aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.txt11
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/netfilter.h45
-rw-r--r--include/linux/netfilter_ingress.h4
-rw-r--r--include/net/netfilter/nf_conntrack.h3
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h5
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h45
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h36
-rw-r--r--include/net/netfilter/nf_conntrack_timeout.h4
-rw-r--r--include/net/netfilter/nf_queue.h2
-rw-r--r--include/net/netfilter/nf_tables.h10
-rw-r--r--include/net/netfilter/nf_tables_core.h2
-rw-r--r--include/net/netlink.h1
-rw-r--r--include/net/netns/netfilter.h2
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h17
-rw-r--r--lib/nlattr.c24
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/bridge/netfilter/ebt_ip.c4
-rw-r--r--net/bridge/netfilter/ebt_ip6.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c13
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c40
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c12
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c2
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c57
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c20
-rw-r--r--net/ipv6/ila/ila_xlat.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c42
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c17
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c29
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c351
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c46
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c44
-rw-r--r--net/netfilter/nf_conntrack_expect.c66
-rw-r--r--net/netfilter/nf_conntrack_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c106
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c90
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c15
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c19
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c19
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c17
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c97
-rw-r--r--net/netfilter/nf_internals.h10
-rw-r--r--net/netfilter/nf_queue.c68
-rw-r--r--net/netfilter/nf_sockopt.c2
-rw-r--r--net/netfilter/nf_tables_api.c128
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nf_tables_trace.c42
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c21
-rw-r--r--net/netfilter/nft_exthdr.c213
-rw-r--r--net/netfilter/nft_fib_netdev.c87
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_rt.c73
-rw-r--r--net/netfilter/nft_set_rbtree.c49
-rw-r--r--net/netfilter/x_tables.c14
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/netfilter/xt_addrtype.c3
-rw-r--r--net/netfilter/xt_connlimit.c26
-rw-r--r--net/netfilter/xt_hashlimit.c8
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--security/smack/smack_netfilter.c2
96 files changed, 1443 insertions, 896 deletions
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 497d668288f9..433b6724797a 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -96,17 +96,6 @@ nf_conntrack_max - INTEGER
Size of connection tracking table. Default value is
nf_conntrack_buckets value * 4.
-nf_conntrack_default_on - BOOLEAN
- 0 - don't register conntrack in new net namespaces
- 1 - register conntrack in new net namespaces (default)
-
- This controls wheter newly created network namespaces have connection
- tracking enabled by default. It will be enabled automatically
- regardless of this setting if the new net namespace requires
- connection tracking, e.g. when NAT rules are created.
- This setting is only visible in initial user namespace, it has no
- effect on existing namespaces.
-
nf_conntrack_tcp_be_liberal - BOOLEAN
0 - disabled (default)
not 0 - enabled
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 58a9f990b553..c74893c1e620 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -15,7 +15,7 @@ struct ipvlan_netns {
unsigned int ipvl_nf_hook_refcnt;
};
-static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+static const struct nf_hook_ops ipvl_nfops[] = {
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV4,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8aba119bb005..adacc45abec1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1791,7 +1791,7 @@ struct net_device {
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
- struct nf_hook_entry __rcu *nf_hooks_ingress;
+ struct nf_hook_entries __rcu *nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN];
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 22f081065d49..f84bca1703cd 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -72,25 +72,32 @@ struct nf_hook_ops {
};
struct nf_hook_entry {
- struct nf_hook_entry __rcu *next;
nf_hookfn *hook;
void *priv;
- const struct nf_hook_ops *orig_ops;
};
-static inline void
-nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops)
-{
- entry->next = NULL;
- entry->hook = ops->hook;
- entry->priv = ops->priv;
- entry->orig_ops = ops;
-}
+struct nf_hook_entries {
+ u16 num_hook_entries;
+ /* padding */
+ struct nf_hook_entry hooks[];
+
+ /* trailer: pointers to original orig_ops of each hook.
+ *
+ * This is not part of struct nf_hook_entry since its only
+ * needed in slow path (hook register/unregister).
+ *
+ * const struct nf_hook_ops *orig_ops[]
+ */
+};
-static inline int
-nf_hook_entry_priority(const struct nf_hook_entry *entry)
+static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
{
- return entry->orig_ops->priority;
+ unsigned int n = e->num_hook_entries;
+ const void *hook_end;
+
+ hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */
+
+ return (struct nf_hook_ops **)hook_end;
}
static inline int
@@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
return entry->hook(entry->priv, skb, state);
}
-static inline const struct nf_hook_ops *
-nf_hook_entry_ops(const struct nf_hook_entry *entry)
-{
- return entry->orig_ops;
-}
-
static inline void nf_hook_state_init(struct nf_hook_state *p,
unsigned int hook,
u_int8_t pf,
@@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
#endif
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry);
+ const struct nf_hook_entries *e, unsigned int i);
/**
* nf_hook - call a netfilter hook
@@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
struct net_device *indev, struct net_device *outdev,
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
- struct nf_hook_entry *hook_head;
+ struct nf_hook_entries *hook_head;
int ret = 1;
#ifdef HAVE_JUMP_LABEL
@@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
nf_hook_state_init(&state, hook, pf, indev, outdev,
sk, net, okfn);
- ret = nf_hook_slow(skb, &state, hook_head);
+ ret = nf_hook_slow(skb, &state, hook_head, 0);
}
rcu_read_unlock();
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 59476061de86..8d5dae1e2ff8 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
- struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+ struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
int ret;
@@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
nf_hook_state_init(&state, NF_NETDEV_INGRESS,
NFPROTO_NETDEV, skb->dev, NULL, NULL,
dev_net(skb->dev), NULL);
- ret = nf_hook_slow(skb, &state, e);
+ ret = nf_hook_slow(skb, &state, e, 0);
if (ret == 0)
return -1;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 48407569585d..6e6f678aaac7 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
+/* Set all unconfirmed conntrack as dying */
+void nf_ct_unconfirmed_destroy(struct net *);
+
/* Iterate over all conntracks: if iter returns true, it's deleted. */
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 2ba54feaccd8..818def011110 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct);
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp);
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data);
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data, u32 portid, int report);
+
/* Allocate space for an expectation: this is mandatory before calling
nf_ct_expect_related. You will have to call put afterwards. */
struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 6d14b36e3a49..6269deecbee7 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -20,8 +20,8 @@ struct nf_conntrack_l3proto {
/* L3 Protocol Family number. ex) PF_INET */
u_int16_t l3proto;
- /* Protocol name */
- const char *name;
+ /* size of tuple nlattr, fills a hole */
+ u16 nla_size;
/*
* Try to fill in the third arg: nhoff is offset of l3 proto
@@ -37,10 +37,6 @@ struct nf_conntrack_l3proto {
bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig);
- /* Print out the per-protocol part of the tuple. */
- void (*print_tuple)(struct seq_file *s,
- const struct nf_conntrack_tuple *);
-
/*
* Called before tracking.
* *dataoff: offset of protocol header (TCP, UDP,...) in skb
@@ -49,23 +45,17 @@ struct nf_conntrack_l3proto {
int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum);
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
-
- /* Called when netns wants to use connection tracking */
- int (*net_ns_get)(struct net *);
- void (*net_ns_put)(struct net *);
-
- /*
- * Calculate size of tuple nlattr
- */
- int (*nlattr_tuple_size)(void);
-
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
const struct nla_policy *nla_policy;
+#endif
- size_t nla_size;
+ /* Called when netns wants to use connection tracking */
+ int (*net_ns_get)(struct net *);
+ void (*net_ns_put)(struct net *);
/* Module (if any) which this is connected to. */
struct module *me;
@@ -73,26 +63,11 @@ struct nf_conntrack_l3proto {
extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
-#ifdef CONFIG_SYSCTL
-/* Protocol pernet registration. */
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto);
-#else
-static inline int nf_ct_l3proto_pernet_register(struct net *n,
- struct nf_conntrack_l3proto *p)
-{
- return 0;
-}
-#endif
-
-void nf_ct_l3proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l3proto *proto);
-
/* Protocol global registration. */
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
-struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
+const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7032e044bbe2..d4933d56809d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,13 +61,6 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
- /* Print out the per-protocol part of the tuple. Return like seq_* */
- void (*print_tuple)(struct seq_file *s,
- const struct nf_conntrack_tuple *);
-
- /* Print out the private part of the conntrack. */
- void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-
/* Return the array of timeouts for this protocol. */
unsigned int *(*get_timeouts)(struct net *net);
@@ -92,15 +85,19 @@ struct nf_conntrack_l4proto {
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct {
- size_t obj_size;
int (*nlattr_to_obj)(struct nlattr *tb[],
struct net *net, void *data);
int (*obj_to_nlattr)(struct sk_buff *skb, const void *data);
- unsigned int nlattr_max;
+ u16 obj_size;
+ u16 nlattr_max;
const struct nla_policy *nla_policy;
} ctnl_timeout;
#endif
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+ /* Print out the private part of the conntrack. */
+ void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
+#endif
unsigned int *net_id;
/* Init l4proto pernet data */
int (*init_net)(struct net *net, u_int16_t proto);
@@ -108,9 +105,6 @@ struct nf_conntrack_l4proto {
/* Return the per-net protocol part. */
struct nf_proto_net *(*get_net_proto)(struct net *net);
- /* Protocol name */
- const char *name;
-
/* Module (if any) which this is connected to. */
struct module *me;
};
@@ -120,28 +114,28 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
#define MAX_NF_CT_PROTO 256
-struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
u_int8_t l4proto);
-struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
u_int8_t l4proto);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
/* Protocol pernet registration. */
int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *proto);
+ const struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- struct nf_conntrack_l4proto *proto);
+ const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *proto[],
+ struct nf_conntrack_l4proto *const proto[],
unsigned int num_proto);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *proto[],
- unsigned int num_proto);
+ struct nf_conntrack_l4proto *const proto[],
+ unsigned int num_proto);
/* Protocol global registration. */
int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
unsigned int num_proto);
void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index d40b89355fdd..483d104fa254 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -16,7 +16,7 @@ struct ctnl_timeout {
refcount_t refcnt;
char name[CTNL_TIMEOUT_NAME_MAX];
__u16 l3num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
char data[0];
};
@@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
static inline unsigned int *
nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4454719ff849..39468720fc19 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -10,9 +10,9 @@ struct nf_queue_entry {
struct list_head list;
struct sk_buff *skb;
unsigned int id;
+ unsigned int hook_index; /* index in hook_entries->hook[] */
struct nf_hook_state state;
- struct nf_hook_entry *hook;
u16 size; /* sizeof(entry) + saved route keys */
/* extra space to store route keys */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index bd5be0d691d5..f9795fe394f3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type);
struct nft_set {
struct list_head list;
struct list_head bindings;
- char name[NFT_SET_MAXNAMELEN];
+ char *name;
u32 ktype;
u32 dtype;
u32 objtype;
@@ -859,7 +859,7 @@ struct nft_chain {
u16 level;
u8 flags:6,
genmask:2;
- char name[NFT_CHAIN_MAXNAMELEN];
+ char *name;
};
enum nft_chain_type {
@@ -957,7 +957,7 @@ struct nft_table {
u32 use;
u16 flags:14,
genmask:2;
- char name[NFT_TABLE_MAXNAMELEN];
+ char *name;
};
enum nft_af_flags {
@@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
*/
struct nft_object {
struct list_head list;
- char name[NFT_OBJ_MAXNAMELEN];
+ char *name;
struct nft_table *table;
u32 genmask:2,
use:30;
@@ -1272,7 +1272,7 @@ struct nft_trans_set {
struct nft_trans_chain {
bool update;
- char name[NFT_CHAIN_MAXNAMELEN];
+ char *name;
struct nft_stats __percpu *stats;
u8 policy;
};
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 8f690effec37..424684c33771 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -49,6 +49,8 @@ struct nft_payload_set {
};
extern const struct nft_expr_ops nft_payload_fast_ops;
+
+extern struct static_key_false nft_counters_enabled;
extern struct static_key_false nft_trace_enabled;
#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 82dd298b40c7..e51cf5f81597 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -251,6 +251,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
int nla_policy_len(const struct nla_policy *, int);
struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
+char *nla_strdup(const struct nlattr *nla, gfp_t flags);
int nla_memcpy(void *dest, const struct nlattr *src, int count);
int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
int nla_strcmp(const struct nlattr *nla, const char *str);
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cea396b53a60..72d66c8763d0 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,7 +16,7 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
- struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
bool defrag_ipv4;
#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 683f6f88fcac..b49da72efa68 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,10 +1,11 @@
#ifndef _LINUX_NF_TABLES_H
#define _LINUX_NF_TABLES_H
-#define NFT_TABLE_MAXNAMELEN 32
-#define NFT_CHAIN_MAXNAMELEN 32
-#define NFT_SET_MAXNAMELEN 32
-#define NFT_OBJ_MAXNAMELEN 32
+#define NFT_NAME_MAXLEN 256
+#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN
+#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN
+#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN
+#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN
#define NFT_USERDATA_MAXLEN 256
/**
@@ -731,7 +732,8 @@ enum nft_exthdr_op {
* @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
* @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
* @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
- * @NFTA_EXTHDR_OP: option match type (NLA_U8)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U32)
+ * @NFTA_EXTHDR_SREG: option match type (NLA_U32)
*/
enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@@ -741,6 +743,7 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_LEN,
NFTA_EXTHDR_FLAGS,
NFTA_EXTHDR_OP,
+ NFTA_EXTHDR_SREG,
__NFTA_EXTHDR_MAX
};
#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1)
@@ -808,11 +811,13 @@ enum nft_meta_keys {
* @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
* @NFT_RT_NEXTHOP4: routing nexthop for IPv4
* @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ * @NFT_RT_TCPMSS: fetch current path tcp mss
*/
enum nft_rt_keys {
NFT_RT_CLASSID,
NFT_RT_NEXTHOP4,
NFT_RT_NEXTHOP6,
+ NFT_RT_TCPMSS,
};
/**
@@ -1221,6 +1226,8 @@ enum nft_objref_attributes {
enum nft_gen_attributes {
NFTA_GEN_UNSPEC,
NFTA_GEN_ID,
+ NFTA_GEN_PROC_PID,
+ NFTA_GEN_PROC_NAME,
__NFTA_GEN_MAX
};
#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
diff --git a/lib/nlattr.c b/lib/nlattr.c
index ee79b7a3c6b0..927c2f19f119 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -302,6 +302,30 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
EXPORT_SYMBOL(nla_strlcpy);
/**
+ * nla_strdup - Copy string attribute payload into a newly allocated buffer
+ * @nla: attribute to copy the string from
+ * @flags: the type of memory to allocate (see kmalloc).
+ *
+ * Returns a pointer to the allocated buffer or NULL on error.
+ */
+char *nla_strdup(const struct nlattr *nla, gfp_t flags)
+{
+ size_t srclen = nla_len(nla);
+ char *src = nla_data(nla), *dst;
+
+ if (srclen > 0 && src[srclen - 1] == '\0')
+ srclen--;
+
+ dst = kmalloc(srclen + 1, flags);
+ if (dst != NULL) {
+ memcpy(dst, src, srclen);
+ dst[srclen] = '\0';
+ }
+ return dst;
+}
+EXPORT_SYMBOL(nla_strdup);
+
+/**
* nla_memcpy - Copy a netlink attribute into another memory area
* @dest: where to copy to memcpy
* @src: netlink attribute to copy from
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2261e5194c82..c2eea1b8737a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
-static struct nf_hook_ops br_nf_ops[] __read_mostly = {
+static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
@@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
int (*okfn)(struct net *, struct sock *,
struct sk_buff *))
{
- struct nf_hook_entry *elem;
+ const struct nf_hook_entries *e;
struct nf_hook_state state;
+ struct nf_hook_ops **ops;
+ unsigned int i;
int ret;
- for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
- elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
- elem = rcu_dereference(elem->next))
- ;
-
- if (!elem)
+ e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+ if (!e)
return okfn(net, sk, skb);
+ ops = nf_hook_entries_get_hook_ops(e);
+ for (i = 0; i < e->num_hook_entries &&
+ ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+ ;
+
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
- ret = nf_hook_slow(skb, &state, elem);
+ ret = nf_hook_slow(skb, &state, e, i);
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d06968bdf5ec..2b46c50abce0 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
- return false;
+ return false;
}
if (info->bitmask & EBT_IP_SPORT) {
u32 src = ntohs(pptr->src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
- return false;
+ return false;
}
}
return true;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4617491be41e..2a5a52a53ec4 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (NF_INVF(info, EBT_IP6_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
- return false;
+ return false;
}
if ((info->bitmask & EBT_IP6_ICMP6) &&
NF_INVF(info, EBT_IP6_ICMP6,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index f22ef7c21913..45a00dbdbcad 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_filter);
}
-static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_filter[] = {
{
.hook = ebt_in_hook,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 30dedcb56ade..57cd5bb154e7 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_nat);
}
-static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_nat[] = {
{
.hook = ebt_nat_out,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9c6e619f452b..54c7ef4e970e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1069,15 +1069,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- repl->name, AF_BRIDGE, repl->nentries);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ repl->name, AF_BRIDGE, repl->nentries);
}
#endif
return ret;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index aa8ffecc46a4..ab395e55cd78 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
RCV_SKB_FAIL(-EINVAL);
}
-static struct nf_hook_ops dnrmg_ops __read_mostly = {
+static const struct nf_hook_ops dnrmg_ops = {
.hook = dnrmg_hook,
.pf = NFPROTO_DECNET,
.hooknum = NF_DN_ROUTE,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e9d9afd18f7..e04457198f93 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1117,7 +1117,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
- struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
int h;
@@ -1132,7 +1131,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
de->target_offset = e->target_offset - (origsize - *size);
t = compat_arpt_get_target(e);
- target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 622ed2887cd5..ce1d97579ce8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -151,7 +151,7 @@ static const char *const comments[] = {
[NF_IP_TRACE_COMMENT_POLICY] = "policy",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -1356,7 +1356,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
- struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
int h;
@@ -1375,7 +1374,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
- target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index efaa04dcc80e..17b4ca562944 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -625,7 +625,7 @@ arp_mangle(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops cip_arp_ops __read_mostly = {
+static const struct nf_hook_ops cip_arp_ops = {
.hook = arp_mangle,
.pf = NFPROTO_ARP,
.hooknum = NF_ARP_OUT,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f1528f7175a8..811689e523c3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -416,7 +416,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
{
.hook = ipv4_synproxy_hook,
.pf = NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 138a24bc76ad..a1a07b338ccf 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv,
return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
}
-static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_in,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e14ed11a35c..fe374da4bc13 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void ipv4_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "src=%pI4 dst=%pI4 ",
- &tuple->src.u3.ip, &tuple->dst.u3.ip);
-}
-
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -174,7 +167,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
@@ -303,11 +296,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-
-static int ipv4_nlattr_tuple_size(void)
-{
- return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -358,16 +346,15 @@ static void ipv4_hooks_unregister(struct net *net)
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
- .name = "ipv4",
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
- .print_tuple = ipv4_print_tuple,
.get_l4proto = ipv4_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
- .nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
+ .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */
#endif
.net_ns_get = ipv4_hooks_register,
.net_ns_put = ipv4_hooks_unregister,
@@ -398,24 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
static int ipv4_net_init(struct net *net)
{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
- ARRAY_SIZE(builtin_l4proto4));
- if (ret < 0)
- return ret;
- ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: pernet registration failed\n");
- nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
- ARRAY_SIZE(builtin_l4proto4));
- }
- return ret;
+ return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
}
static void ipv4_net_exit(struct net *net)
{
- nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
@@ -433,6 +408,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
need_conntrack();
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+ nf_conntrack_l3proto_ipv4.nla_size))
+ return -EINVAL;
+#endif
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d8a9a8..434b4e20f6db 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
static unsigned int *icmp_get_timeouts(struct net *net)
{
return &icmp_pernet(net)->timeout;
@@ -362,10 +352,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
- .name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
- .print_tuple = icmp_print_tuple,
.packet = icmp_packet,
.get_timeouts = icmp_get_timeouts,
.new = icmp_new,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 346bf7ccac08..37fe1616ca0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv4_defrag_ops[] = {
+static const struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
.pf = NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 2f3895ddc275..df5c2a2061a4 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -25,7 +25,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index c83a9963269b..4388de0e5380 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -24,7 +24,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7ebba0b6..ac8342dcb55e 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
- nated_port)) == 0) {
- /* Save ports */
- info->rtp_port[i][dir] = rtp_port;
- info->rtp_port[i][!dir] = htons(nated_port);
- } else {
+ nated_port))) {
nf_ct_unexpect_related(rtp_exp);
nf_ct_unexpect_related(rtcp_exp);
return -1;
}
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+
/* Success */
pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
&rtp_exp->tuple.src.u3.ip,
@@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = htons(nated_port);
- } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
&exp->tuple.src.u3.ip,
ntohs(exp->tuple.src.u.tcp.port),
@@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = htons(nated_port);
-
- /* Fix for Gnomemeeting */
- if (idx > 0 &&
- get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
- (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(skb, protoff, data, 0, &taddr[0],
- &ct->tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
- }
- } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir])) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+ }
+
/* Success */
pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
&exp->tuple.src.u3.ip,
@@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
- &ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index de3681df2ce7..e50976e3c213 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dst = &regs->data[priv->dreg];
const struct net_device *dev = NULL;
- const struct iphdr *iph;
+ struct iphdr *iph, _iph;
__be32 addr;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- iph = ip_hdr(pkt->skb);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
if (priv->flags & NFTA_FIB_F_DADDR)
addr = iph->daddr;
else
@@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dest = &regs->data[priv->dreg];
- const struct iphdr *iph;
+ struct iphdr *iph, _iph;
struct fib_result res;
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
@@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- iph = ip_hdr(pkt->skb);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 77f7f8c7d93d..5bd419c1abc8 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -208,7 +208,7 @@ ila_nf_input(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+static const struct nf_hook_ops ila_nf_hook_ops[] = {
{
.hook = ila_nf_input,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1f90644056ac..9f6644958e5e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -176,7 +176,7 @@ static const char *const comments[] = {
[NF_IP6_TRACE_COMMENT_POLICY] = "policy",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index ce203dd729e0..a5cd43d75393 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
{
.hook = ipv6_synproxy_hook,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7d2bd940291f..991512576c8c 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
}
-static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_in,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4e3402486833..fe01dc953c56 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void ipv6_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "src=%pI6 dst=%pI6 ",
- tuple->src.u3.ip6, tuple->dst.u3.ip6);
-}
-
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -191,7 +184,7 @@ static unsigned int ipv6_conntrack_local(void *priv,
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
-static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
.pf = NFPROTO_IPV6,
@@ -308,11 +301,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-
-static int ipv6_nlattr_tuple_size(void)
-{
- return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
#endif
static int ipv6_hooks_register(struct net *net)
@@ -353,16 +341,15 @@ static void ipv6_hooks_unregister(struct net *net)
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
.l3proto = PF_INET6,
- .name = "ipv6",
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
- .print_tuple = ipv6_print_tuple,
.get_l4proto = ipv6_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
- .nlattr_tuple_size = ipv6_nlattr_tuple_size,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
+ .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
#endif
.net_ns_get = ipv6_hooks_register,
.net_ns_put = ipv6_hooks_unregister,
@@ -398,25 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
static int ipv6_net_init(struct net *net)
{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
- ARRAY_SIZE(builtin_l4proto6));
- if (ret < 0)
- return ret;
-
- ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
- nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
- ARRAY_SIZE(builtin_l4proto6));
- }
- return ret;
+ return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
}
static void ipv6_net_exit(struct net *net)
{
- nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
@@ -434,6 +408,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
need_conntrack();
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
+ nf_conntrack_l3proto_ipv6.nla_size))
+ return -EINVAL;
+#endif
+
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e33f65..43544b975eae 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void icmpv6_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
return &icmpv6_pernet(net)->timeout;
@@ -131,11 +121,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("icmpv6: can't create new conn with type %u\n",
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
- if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
- nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
- NULL, NULL,
- "nf_ct_icmpv6: invalid new with type %d ",
- type + 128);
return false;
}
return true;
@@ -367,10 +352,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
- .name = "icmpv6",
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
- .print_tuple = icmpv6_print_tuple,
.packet = icmpv6_packet,
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ada60d1a991b..b326da59257f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv,
return err == 0 ? NF_ACCEPT : NF_DROP;
}
-static struct nf_hook_ops ipv6_defrag_ops[] = {
+static const struct nf_hook_ops ipv6_defrag_ops[] = {
{
.hook = ipv6_defrag,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 97c724224da7..b397a8fe88b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -25,7 +25,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 43f91d9b086c..54b5899543ef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev)
static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
const struct nft_pktinfo *pkt,
- const struct net_device *dev)
+ const struct net_device *dev,
+ struct ipv6hdr *iph)
{
- const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
int lookup_flags = 0;
if (priv->flags & NFTA_FIB_F_DADDR) {
@@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
}
static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
- const struct nft_pktinfo *pkt)
+ const struct nft_pktinfo *pkt,
+ struct ipv6hdr *iph)
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
@@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+ nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
v6ops = nf_get_ipv6_ops();
if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dest = &regs->data[priv->dreg];
+ struct ipv6hdr *iph, _iph;
- *dest = __nft_fib6_eval_type(priv, pkt);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ *dest = __nft_fib6_eval_type(priv, pkt, iph);
}
EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
@@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
+ struct ipv6hdr *iph, _iph;
struct flowi6 fl6 = {
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
@@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
else if (priv->flags & NFTA_FIB_F_OIF)
oif = nft_out(pkt);
- lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
This option enables packet forwarding for the "netdev" family.
+config NFT_FIB_NETDEV
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables netdev fib lookups support"
+ help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
endif # NF_TABLES_NETDEV
endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..d3891c93edd6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..04fe25abc5f6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT 1024
+
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+ sizeof(struct nf_hook_ops *) * num;
+
+ if (num == 0)
+ return NULL;
+
+ e = kvzalloc(alloc, GFP_KERNEL);
+ if (e)
+ e->num_hook_entries = num;
+ return e;
+}
+
+static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+ .hook = accept_all,
+ .priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+ const struct nf_hook_ops *reg)
+{
+ unsigned int i, alloc_entries, nhooks, old_entries;
+ struct nf_hook_ops **orig_ops = NULL;
+ struct nf_hook_ops **new_ops;
+ struct nf_hook_entries *new;
+ bool inserted = false;
+
+ alloc_entries = 1;
+ old_entries = old ? old->num_hook_entries : 0;
+
+ if (old) {
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+
+ for (i = 0; i < old_entries; i++) {
+ if (orig_ops[i] != &dummy_ops)
+ alloc_entries++;
+ }
+ }
+
+ if (alloc_entries > MAX_HOOK_COUNT)
+ return ERR_PTR(-E2BIG);
+
+ new = allocate_hook_entries_size(alloc_entries);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+
+ i = 0;
+ nhooks = 0;
+ while (i < old_entries) {
+ if (orig_ops[i] == &dummy_ops) {
+ ++i;
+ continue;
+ }
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+ i++;
+ } else {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ inserted = true;
+ }
+ nhooks++;
+ }
+
+ if (!inserted) {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ }
+
+ return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ struct nf_hook_ops **orig_ops;
+ int prio = INT_MIN;
+ size_t i = 0;
+
+ orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+ for (i = 0; i < hooks->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+
+ WARN_ON(orig_ops[i]->priority < prio);
+
+ if (orig_ops[i]->priority > prio)
+ prio = orig_ops[i]->priority;
+ }
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+ struct nf_hook_entries *old, *new = NULL;
+ unsigned int i, j, skip = 0, hook_entries;
+ struct nf_hook_ops **orig_ops;
+ struct nf_hook_ops **new_ops;
+
+ old = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!old))
+ return NULL;
+
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ skip++;
+ }
+
+ /* if skip == hook_entries all hooks have been removed */
+ hook_entries = old->num_hook_entries;
+ if (skip == hook_entries)
+ goto out_assign;
+
+ if (WARN_ON(skip == 0))
+ return NULL;
+
+ hook_entries -= skip;
+ new = allocate_hook_entries_size(hook_entries);
+ if (!new)
+ return NULL;
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+ for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+ new->hooks[j] = old->hooks[i];
+ new_ops[j] = (void *)orig_ops[i];
+ j++;
+ }
+ hooks_validate(new);
+out_assign:
+ rcu_assign_pointer(*pp, new);
+ return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf != NFPROTO_NETDEV)
return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
return &reg->dev->nf_hooks_ingress;
}
#endif
+ WARN_ON_ONCE(1);
return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *entry, *p;
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!pp)
return -EINVAL;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- nf_hook_entry_init(entry, reg);
-
mutex_lock(&nf_hook_mutex);
- /* Find the spot in the list */
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (reg->priority < nf_hook_entry_priority(p))
- break;
- }
- rcu_assign_pointer(entry->next, p);
- rcu_assign_pointer(*pp, entry);
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+ if (!IS_ERR(new_hooks))
+ rcu_assign_pointer(*pp, new_hooks);
mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+ hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+ synchronize_net();
+ BUG_ON(p == new_hooks);
+ kvfree(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *p;
-
- pp = nf_hook_entry_head(net, reg);
- if (WARN_ON_ONCE(!pp))
- return NULL;
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+ unsigned int i;
- mutex_lock(&nf_hook_mutex);
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (nf_hook_entry_ops(p) == reg) {
- rcu_assign_pointer(*pp, p->next);
- break;
- }
- }
- mutex_unlock(&nf_hook_mutex);
- if (!p) {
- WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return NULL;
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] != unreg)
+ continue;
+ WRITE_ONCE(old->hooks[i].hook, accept_all);
+ WRITE_ONCE(orig_ops[i], &dummy_ops);
+ found = true;
+ break;
}
+
+ if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
- net_dec_ingress_queue();
+ if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
#endif
-
- return p;
+ } else {
+ WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ }
}
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
unsigned int nfq;
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return;
+
+ mutex_lock(&nf_hook_mutex);
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p)) {
+ mutex_unlock(&nf_hook_mutex);
+ return;
+ }
+
+ __nf_unregister_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
if (!p)
return;
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
nfq = nf_queue_nf_hook_drop(net);
if (nfq)
synchronize_net();
- kfree(p);
+ kvfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entry *to_free[16];
- unsigned int i, n, nfq;
+ struct nf_hook_entries *to_free[16], *p;
+ struct nf_hook_entries __rcu **pp;
+ unsigned int i, j, n;
+
+ mutex_lock(&nf_hook_mutex);
+ for (i = 0; i < hookcount; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p))
+ continue;
+ __nf_unregister_net_hook(p, &reg[i]);
+ }
+ mutex_unlock(&nf_hook_mutex);
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
- for (i = 0; i < n; i++)
- to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+ mutex_lock(&nf_hook_mutex);
- synchronize_net();
+ for (i = 0, j = 0; i < hookcount && j < n; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (!p)
+ continue;
+
+ to_free[j] = __nf_hook_entries_try_shrink(pp);
+ if (to_free[j])
+ ++j;
+ }
+
+ mutex_unlock(&nf_hook_mutex);
+
+ if (j) {
+ unsigned int nfq;
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
synchronize_net();
- for (i = 0; i < n; i++)
- kfree(to_free[i]);
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < j; i++)
+ kvfree(to_free[i]);
+ }
reg += n;
hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry)
+ const struct nf_hook_entries *e, unsigned int s)
{
unsigned int verdict;
int ret;
- do {
- verdict = nf_hook_entry_hookfn(entry, skb, state);
+ for (; s < e->num_hook_entries; s++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
- entry = rcu_dereference(entry->next);
break;
case NF_DROP:
kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
+ ret = nf_queue(skb, state, e, s, verdict);
+ if (ret == 1)
continue;
return ret;
default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
*/
return 0;
}
- } while (entry);
+ }
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..4f940d7eb2f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..e1efa446b305 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
-out:
+
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5bc37d..121a321b91be 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
}
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a88c7f..30e11cd6aa8a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0400f4..90d396814798 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
LeaveFunction(10);
kfree_skb(skb);
- rcu_read_unlock();
return NF_STOLEN;
}
#endif
@@ -1035,7 +1023,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1030,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
rt = skb_rtable(skb);
tdev = rt->dst.dev;
@@ -1095,7 +1080,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1104,7 +1088,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1127,7 +1110,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
@@ -1136,10 +1118,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
@@ -1185,7 +1165,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1194,7 +1173,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1213,17 +1191,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
ip_send_check(ip_hdr(skb));
@@ -1231,14 +1206,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1252,7 +1225,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
@@ -1261,23 +1233,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1322,7 +1290,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
@@ -1368,12 +1335,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
@@ -1414,7 +1379,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
@@ -1460,12 +1424,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca61612..ecc3ab784633 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
}
} endfor_ifa(in_dev);
}
- rcu_read_unlock();
if (mask == 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..c23df7c9cd59 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include "nf_internals.h"
+
#define NF_CONNTRACK_VERSION "0.5.0"
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -248,8 +250,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -398,7 +400,7 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
@@ -407,13 +409,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -695,7 +694,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -1084,7 +1083,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1177,8 +1176,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1289,8 +1288,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1345,10 +1344,10 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -1689,6 +1688,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
}
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+ might_sleep();
+
+ if (atomic_read(&net->ct.count) > 0) {
+ __nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
+ synchronize_net();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
@@ -1700,14 +1711,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(&net->ct.count) == 0)
return;
- __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
- synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1733,6 +1740,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
}
rtnl_unlock();
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..dad2c0c22ad5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
/* two references : one for hash insert, one for the timer */
refcount_add(2, &exp->use);
- hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
-
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- net->ct.expect_count++;
-
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
}
add_timer(&exp->timeout);
+ hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
+
+ hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+ net->ct.expect_count++;
+
NF_CT_STAT_INC(net, expect_create);
}
@@ -474,6 +474,60 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data,
+ u32 portid, int report)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, portid, report);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conntrack_expect *exp;
- const struct hlist_node *next;
- unsigned int i;
+ struct nf_conn_help *help = nfct_help(exp->master);
+ const struct nf_conntrack_helper *me = data;
+ const struct nf_conntrack_helper *this;
+
+ if (exp->helper == me)
+ return true;
+ this = rcu_dereference_protected(help->helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
+ return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
mutex_lock(&nf_ct_helper_mutex);
hlist_del_rcu(&me->hnode);
nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
- /* Get rid of expectations */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i], hnode) {
- struct nf_conn_help *help = nfct_help(exp->master);
- if ((rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_expect_lock)
- ) == me || exp->helper == me))
- nf_ct_remove_expect(exp);
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
+ nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70c3bfb..de4053d84364 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
static char __initdata version[] = "0.93";
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
int ret;
@@ -535,17 +535,16 @@ nla_put_failure:
static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
- size_t len = 0;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ size_t len;
- rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- len += l3proto->nla_size;
+ len = l3proto->nla_size;
+ len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
- rcu_read_unlock();
return len;
}
@@ -664,7 +663,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +734,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
- rcu_read_unlock();
-
nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
@@ -747,7 +743,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -941,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
const struct nlattr * const cda[])
{
const struct nlattr *attr = cda[CTA_PROTOINFO];
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTOINFO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int err = 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2208,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2266,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
goto nla_put_failure;
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -ENOSPC;
}
@@ -2483,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
- int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
+ int ret;
memset(&m, 0xFF, sizeof(m));
memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2653,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -2910,6 +2899,21 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+ const struct nf_conn_help *m_help;
+ const char *name = data;
+
+ m_help = nfct_help(exp->master);
+
+ return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+ return true;
+}
+
static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const cda[],
@@ -2918,10 +2922,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
- unsigned int i;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2963,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
nf_ct_expect_put(exp);
} else if (cda[CTA_EXPECT_HELP_NAME]) {
char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
- struct nf_conn_help *m_help;
-
- /* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
- m_help = nfct_help(exp->master);
- if (!strcmp(m_help->helper->name, name) &&
- del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_name, name,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
-
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
}
return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93063d4..11562f2a08bb 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- rcu_read_lock();
nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("not found\n");
}
}
- rcu_read_unlock();
}
static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..b3e489c859ec 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
}
#endif
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
+ const struct nf_conntrack_l3proto *p;
int ret;
- struct nf_conntrack_l3proto *p;
retry: p = nf_ct_l3proto_find_get(l3proto);
if (p == &nf_conntrack_l3proto_generic) {
@@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
- struct nf_conntrack_l4proto *p;
+ const struct nf_conntrack_l4proto *p;
rcu_read_lock();
p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
{
module_put(p->me);
}
@@ -196,28 +196,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
static int kill_l3proto(struct nf_conn *i, void *data)
{
- return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+ return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
}
static int kill_l4proto(struct nf_conn *i, void *data)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
}
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
{
int ret = 0;
struct nf_conntrack_l3proto *old;
if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
-
- if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (proto->tuple_to_nlattr && proto->nla_size == 0)
return -EINVAL;
-
+#endif
mutex_lock(&nf_ct_proto_mutex);
old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
goto out_unlock;
}
- if (proto->nlattr_tuple_size)
- proto->nla_size = 3 * proto->nlattr_tuple_size();
-
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
@@ -238,21 +235,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- if (nf_conntrack_default_on == 0)
- return 0;
-
- return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
@@ -266,27 +249,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l3proto, proto);
+ nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-void nf_ct_l3proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- /*
- * nf_conntrack_default_on *might* have registered hooks.
- * ->net_ns_put must cope with more puts() than get(), i.e.
- * if nf_conntrack_default_on was 0 at time of
- * nf_ct_l3proto_pernet_register invocation this net_ns_put()
- * should be a noop.
- */
- if (proto->net_ns_put)
- proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
if (l4proto->get_net_proto) {
/* statically built-in protocols use static per-net */
@@ -301,7 +269,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
@@ -324,8 +292,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
- struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ struct nf_proto_net *pn,
+ const struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
@@ -395,7 +363,7 @@ out_unlock:
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nf_proto_net *pn = NULL;
@@ -420,7 +388,7 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +401,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
&nf_conntrack_l4proto_generic);
}
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
mutex_lock(&nf_ct_proto_mutex);
__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +412,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
@@ -469,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
- pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
- ver, l4proto[i]->name, ver);
+ pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+ ver, l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -478,7 +446,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
+ struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -490,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
- pr_err("nf_conntrack_%s%d: pernet registration failed\n",
- l4proto[i]->name,
+ pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+ l4proto[i]->l4proto,
l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
@@ -514,8 +482,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
- unsigned int num_proto)
+ struct nf_conntrack_l4proto *const l4proto[],
+ unsigned int num_proto)
{
while (num_proto-- != 0)
nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..188347571fc7 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -623,18 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
return false;
}
-static void dccp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +874,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -888,8 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
@@ -916,7 +910,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -924,8 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..2993995b690d 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
- case IPPROTO_SCTP:
- return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
- case IPPROTO_DCCP:
- return false;
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
case IPPROTO_GRE:
return false;
#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
- case IPPROTO_UDPLITE:
- return false;
-#endif
default:
return true;
}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
@@ -187,10 +169,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..c0e3a23ac23a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
return true;
}
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* print private data for conntrack */
static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
(ct->proto.gre.timeout / HZ),
(ct->proto.gre.stream_timeout / HZ));
}
+#endif
static unsigned int *gre_get_timeouts(struct net *net)
{
@@ -364,11 +357,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
- .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
+#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..890b5c73368d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
}
+#endif
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
@@ -791,11 +784,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
@@ -828,11 +821,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..33c52d9ab2f5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
+#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
@@ -1556,11 +1549,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
@@ -1594,11 +1587,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..dcf3030d2226 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@@ -313,11 +304,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -347,11 +336,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -381,11 +368,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -415,11 +400,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af4274335..4dbb5bad4363 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
tuple.dst.u3 = *daddr;
tuple.dst.u.udp.port = port;
- rcu_read_lock();
do {
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
goto err1;
}
- if (skip_expect) {
- rcu_read_unlock();
+ if (skip_expect)
return NF_ACCEPT;
- }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
err2:
nf_ct_expect_put(rtp_exp);
err1:
- rcu_read_unlock();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..9eb85858d764 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
- l3proto->print_tuple(s, tuple);
- l4proto->print_tuple(s, tuple);
+ switch (l3proto->l3proto) {
+ case NFPROTO_IPV4:
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ seq_printf(s, "src=%pI6 dst=%pI6 ",
+ tuple->src.u3.ip6, tuple->dst.u3.ip6);
+ break;
+ default:
+ break;
+ }
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_TCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+ break;
+ case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_UDP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+
+ break;
+ case IPPROTO_DCCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.dccp.port),
+ ntohs(tuple->dst.u.dccp.port));
+ break;
+ case IPPROTO_SCTP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+ break;
+ case IPPROTO_ICMPV6:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_GRE:
+ seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+ break;
+ default:
+ break;
+ }
}
EXPORT_SYMBOL_GPL(print_tuple);
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+static const char* l3proto_name(u16 proto)
+{
+ switch (proto) {
+ case AF_INET: return "ipv4";
+ case AF_INET6: return "ipv6";
+ }
+
+ return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+ switch (proto) {
+ case IPPROTO_ICMP: return "icmp";
+ case IPPROTO_TCP: return "tcp";
+ case IPPROTO_UDP: return "udp";
+ case IPPROTO_DCCP: return "dccp";
+ case IPPROTO_GRE: return "gre";
+ case IPPROTO_SCTP: return "sctp";
+ case IPPROTO_UDPLITE: return "udplite";
+ }
+
+ return "unknown";
+}
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -231,8 +310,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u %ld ",
- l3proto->name, nf_ct_l3num(ct),
- l4proto->name, nf_ct_protonum(ct),
+ l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
static int
nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_default_on",
- .data = &nf_conntrack_default_on,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..49f87ec093a3 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -5,17 +5,11 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict);
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
return count;
}
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- struct nf_hook_entry *hook_entry, unsigned int queuenum)
+ const struct nf_hook_entries *entries,
+ unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
- .hook = hook_entry,
+ .hook_index = index,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -162,18 +164,16 @@ err:
/* Packets leaving via this function must come back through nf_reinject(). */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict)
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict)
{
- struct nf_hook_entry *entry = *entryp;
int ret;
- ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
- *entryp = rcu_dereference(entry->next);
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
return 1;
- }
kfree_skb(skb);
}
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
static unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_entry **entryp)
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
{
- unsigned int verdict;
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
- do {
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
repeat:
- verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
if (verdict != NF_ACCEPT) {
if (verdict != NF_REPEAT)
return verdict;
goto repeat;
}
- *entryp = rcu_dereference((*entryp)->next);
- } while (*entryp);
+ i++;
+ }
+ *index = i;
return NF_ACCEPT;
}
+/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_hook_entry *hook_entry = entry->hook;
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
+ const struct net *net;
+ unsigned int i;
int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
nf_queue_entry_release_refs(entry);
+ i = entry->hook_index;
+ if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ kfree_skb(skb);
+ kfree(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- hook_entry = rcu_dereference(hook_entry->next);
- if (hook_entry)
next_hook:
- verdict = nf_iterate(skb, &entry->state, &hook_entry);
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
-okfn:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, &entry->state, &hook_entry, verdict);
- if (err == 1) {
- if (hook_entry)
- goto next_hook;
- goto okfn;
- }
+ err = nf_queue(skb, &entry->state, hooks, i, verdict);
+ if (err == 1)
+ goto next_hook;
break;
case NF_STOLEN:
break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..d2a9e6b5d01f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..149785ff1c7b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table == NULL)
goto err2;
- nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+ table->name = nla_strdup(name, GFP_KERNEL);
+ if (table->name == NULL)
+ goto err3;
+
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err4:
+ kfree(table->name);
err3:
kfree(table);
err2:
@@ -865,6 +870,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
+ kfree(ctx->table->name);
kfree(ctx->table);
module_put(ctx->afi->owner);
}
@@ -1240,10 +1246,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
module_put(basechain->type->owner);
free_percpu(basechain->stats);
+ if (basechain->stats)
+ static_branch_dec(&nft_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
+ kfree(chain->name);
kfree(basechain);
} else {
+ kfree(chain->name);
kfree(chain);
}
}
@@ -1468,8 +1478,13 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
nft_trans_chain_policy(trans) = -1;
if (nla[NFTA_CHAIN_HANDLE] && name) {
- nla_strlcpy(nft_trans_chain_name(trans), name,
- NFT_CHAIN_MAXNAMELEN);
+ nft_trans_chain_name(trans) =
+ nla_strdup(name, GFP_KERNEL);
+ if (!nft_trans_chain_name(trans)) {
+ kfree(trans);
+ free_percpu(stats);
+ return -ENOMEM;
+ }
}
list_add_tail(&trans->list, &net->nft.commit_list);
return 0;
@@ -1504,14 +1519,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return PTR_ERR(stats);
}
basechain->stats = stats;
- } else {
- stats = netdev_alloc_pcpu_stats(struct nft_stats);
- if (stats == NULL) {
- nft_chain_release_hook(&hook);
- kfree(basechain);
- return -ENOMEM;
- }
- rcu_assign_pointer(basechain->stats, stats);
+ static_branch_inc(&nft_counters_enabled);
}
hookfn = hook.type->hooks[hook.num];
@@ -1543,7 +1551,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ chain->name = nla_strdup(name, GFP_KERNEL);
+ if (!chain->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
@@ -1977,8 +1989,8 @@ err:
}
struct nft_rule_dump_ctx {
- char table[NFT_TABLE_MAXNAMELEN];
- char chain[NFT_CHAIN_MAXNAMELEN];
+ char *table;
+ char *chain;
};
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2014,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table[0] &&
+ if (ctx && ctx->table &&
strcmp(ctx->table, table->name) != 0)
continue;
@@ -2042,7 +2054,13 @@ done:
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_rule_dump_ctx *ctx = cb->data;
+
+ if (ctx) {
+ kfree(ctx->table);
+ kfree(ctx->chain);
+ kfree(ctx);
+ }
return 0;
}
@@ -2074,12 +2092,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (!ctx)
return -ENOMEM;
- if (nla[NFTA_RULE_TABLE])
- nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
- sizeof(ctx->table));
- if (nla[NFTA_RULE_CHAIN])
- nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
- sizeof(ctx->chain));
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_KERNEL);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_KERNEL);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
c.data = ctx;
}
@@ -2621,7 +2650,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
- p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+ p = strchr(name, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2652,7 +2681,10 @@ cont:
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, min + n);
+ set->name = kasprintf(GFP_KERNEL, name, min + n);
+ if (!set->name)
+ return -ENOMEM;
+
list_for_each_entry(i, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
@@ -2929,7 +2961,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- char name[NFT_SET_MAXNAMELEN];
+ char *name;
unsigned int size;
bool create;
u64 timeout;
@@ -3075,8 +3107,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
goto err1;
}
- nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
err = nf_tables_set_alloc_name(&ctx, set, name);
+ kfree(name);
if (err < 0)
goto err2;
@@ -3126,6 +3164,7 @@ static void nft_set_destroy(struct nft_set *set)
{
set->ops->destroy(set);
module_put(set->ops->type->owner);
+ kfree(set->name);
kvfree(set);
}
@@ -4363,15 +4402,21 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
- nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+ obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ if (!obj->name) {
+ err = -ENOMEM;
+ goto err2;
+ }
err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
+err3:
+ kfree(obj->name);
err2:
if (obj->type->destroy)
obj->type->destroy(obj);
@@ -4415,7 +4460,7 @@ nla_put_failure:
}
struct nft_obj_filter {
- char table[NFT_OBJ_MAXNAMELEN];
+ char *table;
u32 type;
};
@@ -4480,7 +4525,10 @@ done:
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_obj_filter *filter = cb->data;
+
+ kfree(filter->table);
+ kfree(filter);
return 0;
}
@@ -4494,9 +4542,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
if (!filter)
return ERR_PTR(-ENOMEM);
- if (nla[NFTA_OBJ_TABLE])
- nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
- NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
if (nla[NFTA_OBJ_TYPE])
filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
@@ -4580,6 +4632,7 @@ static void nft_obj_destroy(struct nft_object *obj)
obj->type->destroy(obj);
module_put(obj->type->owner);
+ kfree(obj->name);
kfree(obj);
}
@@ -4662,6 +4715,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
+ char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4727,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+ nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+ nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4842,7 +4898,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)[0])
+ if (nft_trans_chain_name(trans))
strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_RULE] = "rule",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
return true;
}
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_stats *stats;
+
+ local_bh_disable();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
+}
+
struct nft_jumpstack {
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
nft_trace_packet(&info, basechain, NULL, -1,
NFT_TRACETYPE_POLICY);
- rcu_read_lock_bh();
- stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
- u64_stats_update_begin(&stats->syncp);
- stats->pkts++;
- stats->bytes += pkt->skb->len;
- u64_stats_update_end(&stats->syncp);
- rcu_read_unlock_bh();
+ if (static_branch_unlikely(&nft_counters_enabled))
+ nft_update_chain_stats(basechain, pkt);
return nft_base_chain(basechain)->policy;
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
NFTA_TRACE_PAD);
}
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+ switch (info->type) {
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (info->verdict->code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
- nla_total_size(NFT_TABLE_MAXNAMELEN) +
- nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(strlen(info->chain->table->name)) +
+ nla_total_size(strlen(info->chain->name)) +
nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
- nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
nla_total_size(sizeof(u32)) + /* id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
+ if (nft_trace_have_verdict_chain(info))
+ size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
- if (info->chain) {
- if (nla_put_string(skb, NFTA_TRACE_CHAIN,
- info->chain->name))
- goto nla_put_failure;
- if (nla_put_string(skb, NFTA_TRACE_TABLE,
- info->chain->table->name))
- goto nla_put_failure;
- }
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+ goto nla_put_failure;
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae97153..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+ const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
{
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
char *name;
int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+ const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
+ const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
- unsigned int *timeouts;
int ret;
if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
- __u16 l3num;
- __u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct sk_buff *skb2;
int ret, err;
+ __u16 l3num;
+ __u8 l4num;
if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
- rcu_read_lock();
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
break;
}
err:
- rcu_read_unlock();
return matching;
}
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
{
struct ctnl_timeout *cur, *tmp;
+ nf_ct_unconfirmed_destroy(net);
ctnl_untimeout(net, NULL);
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
return -1;
}
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa04086880..c9796629858f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
#include "../bridge/br_private.h"
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define NFQNL_QMAX_DEFAULT 1024
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -612,6 +616,18 @@ nlmsg_failure:
return NULL;
}
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+ const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+ if (ct && ((ct->status & flags) == IPS_DYING))
+ return true;
+#endif
+ return false;
+}
+
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);
+ if (nf_ct_drop_unconfirmed(entry))
+ goto err_out_free_nskb;
+
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
unsigned int instances = 0;
int i;
- rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
instances++;
}
}
- rcu_read_unlock();
return instances;
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
u8 len;
u8 op;
enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
u8 flags;
};
@@ -61,6 +63,26 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+ unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+ struct tcphdr *tcph;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ return NULL;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+ if (!tcph)
+ return NULL;
+
+ *tcphdr_len = __tcp_hdrlen(tcph);
+ if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+ return NULL;
+
+ return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct tcphdr *tcph;
u8 *opt;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
- if (!tcph)
- goto err;
-
- tcphdr_len = __tcp_hdrlen(tcph);
- if (tcphdr_len < sizeof(*tcph))
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
goto err;
@@ -115,6 +126,88 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ struct tcphdr *tcph;
+ u8 *opt;
+ u32 src;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ return;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ union {
+ u8 octet;
+ __be16 v16;
+ __be32 v32;
+ } old, new;
+
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ return;
+
+ if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ return;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ &tcphdr_len);
+ if (!tcph)
+ return;
+
+ src = regs->data[priv->sreg];
+ offset = i + priv->offset;
+
+ switch (priv->len) {
+ case 2:
+ old.v16 = get_unaligned((u16 *)(opt + offset));
+ new.v16 = src;
+
+ switch (priv->type) {
+ case TCPOPT_MSS:
+ /* increase can cause connection to stall */
+ if (ntohs(old.v16) <= ntohs(new.v16))
+ return;
+ break;
+ }
+
+ if (old.v16 == new.v16)
+ return;
+
+ put_unaligned(new.v16, (u16*)(opt + offset));
+ inet_proto_csum_replace2(&tcph->check, pkt->skb,
+ old.v16, new.v16, false);
+ break;
+ case 4:
+ new.v32 = src;
+ old.v32 = get_unaligned((u32 *)(opt + offset));
+
+ if (old.v32 == new.v32)
+ return;
+
+ put_unaligned(new.v32, (u32*)(opt + offset));
+ inet_proto_csum_replace4(&tcph->check, pkt->skb,
+ old.v32, new.v32, false);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return;
+ }
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, priv->len);
}
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+ int err;
- if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
- goto nla_put_failure;
+ if (!tb[NFTA_EXTHDR_SREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+ return -EINVAL;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ if (offset < 2)
+ return -EOPNOTSUPP;
+
+ switch (len) {
+ case 2: break;
+ case 4: break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = offset;
+ priv->len = len;
+ priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+ priv->flags = flags;
+ priv->op = op;
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
return -1;
}
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_set_eval,
+ .init = nft_exthdr_tcp_set_init,
+ .dump = nft_exthdr_dump_set,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_EXTHDR_OP])
return &nft_exthdr_ipv6_ops;
- op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+ return ERR_PTR(-EOPNOTSUPP);
+
+ op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
- return &nft_exthdr_tcp_ops;
+ if (tb[NFTA_EXTHDR_SREG])
+ return &nft_exthdr_tcp_set_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_tcp_ops;
+ break;
case NFT_EXTHDR_OP_IPV6:
- return &nft_exthdr_ipv6_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_ipv6_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000000..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (ntohs(pkt->skb->protocol)) {
+ case ETH_P_IP:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case ETH_P_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+ .type = &nft_fib_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_netdev_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fib",
+ .ops = &nft_fib_netdev_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
if (!uh)
return false;
- return uh->check;
+ return (__force bool)uh->check;
}
static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
enum nft_registers dreg:8;
};
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+ u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+ const struct sk_buff *skb = pkt->skb;
+ const struct nf_afinfo *ai;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+ minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+ minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ break;
+ }
+
+ ai = nf_get_afinfo(nft_pf(pkt));
+ if (ai) {
+ struct dst_entry *dst = NULL;
+
+ ai->route(nft_net(pkt), &dst, &fl, false);
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
+ }
+ }
+
+ if (mtu <= minlen || mtu > 0xffff)
+ return TCP_MSS_DEFAULT;
+
+ return mtu - minlen;
+}
+
static void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = rt_nexthop((const struct rtable *)dst,
- ip_hdr(skb)->daddr);
+ *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
+ case NFT_RT_TCPMSS:
+ nft_reg_store16(dest, get_tcpmss(pkt, dst));
+ break;
default:
WARN_ON(1);
goto err;
@@ -67,7 +107,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG] = { .type = NLA_U32 },
[NFTA_RT_KEY] = { .type = NLA_U32 },
};
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_NEXTHOP6:
len = sizeof(struct in6_addr);
break;
+ case NFT_RT_TCPMSS:
+ len = sizeof(u16);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -118,6 +161,29 @@ nla_put_failure:
return -1;
}
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_RT_NEXTHOP4:
+ case NFT_RT_NEXTHOP6:
+ case NFT_RT_CLASSID:
+ return 0;
+ case NFT_RT_TCPMSS:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static struct nft_expr_type nft_rt_type;
static const struct nft_expr_ops nft_rt_get_ops = {
.type = &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.eval = nft_rt_get_eval,
.init = nft_rt_get_init,
.dump = nft_rt_get_dump,
+ .validate = nft_rt_validate,
};
static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..d83a4ec5900d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
#include <net/netfilter/nf_tables.h>
struct nft_rbtree {
- rwlock_t lock;
struct rb_root root;
+ rwlock_t lock;
+ seqcount_t count;
};
struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext,
+ unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const void *this;
int d;
- read_lock_bh(&priv->lock);
- parent = priv->root.rb_node;
+ parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
this = nft_set_ext_key(&rbe->ext);
d = memcmp(this, key, set->klen);
if (d < 0) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
continue;
interval = rbe;
} else if (d > 0)
- parent = parent->rb_right;
+ parent = rcu_dereference_raw(parent->rb_right);
else {
if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
continue;
}
if (nft_rbtree_interval_end(rbe))
goto out;
- read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- read_unlock_bh(&priv->lock);
return false;
}
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ bool ret;
+
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return ret;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ read_unlock_bh(&priv->lock);
+
+ return ret;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
- rb_link_node(&new->node, parent, p);
+ rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
}
@@ -144,7 +164,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
int err;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
return err;
@@ -158,7 +180,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
}
@@ -264,6 +288,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
+ seqcount_init(&priv->count);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..c83a3b5e1c6c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table,
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- table->name, table->af,
- private->number);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ table->name, table->af, private->number);
}
#endif
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
u8 proto;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca611ac5..9dae4d665965 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ade4c10c28c6..17d7705e3bd4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return user_laddr;
laddr = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
return user_laddr;
laddr = NULL;
- rcu_read_lock();
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
}
read_unlock_bh(&indev->lock);
}
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dabde35f..3b2be2ae6987 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- rcu_read_lock();
-
afinfo = nf_get_afinfo(NFPROTO_IPV6);
if (afinfo != NULL) {
const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
} else {
route_err = 1;
}
- rcu_read_unlock();
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab762ed..ffa8eec980e9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -58,8 +58,7 @@ struct xt_connlimit_rb {
static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
struct xt_connlimit_data {
- struct rb_root climit_root4[CONNLIMIT_SLOTS];
- struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ struct rb_root climit_root[CONNLIMIT_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
@@ -144,7 +143,6 @@ static unsigned int check_hlist(struct net *net,
unsigned int length = 0;
*addit = true;
- rcu_read_lock();
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +177,6 @@ static unsigned int check_hlist(struct net *net,
length++;
}
- rcu_read_unlock();
-
return length;
}
@@ -297,13 +293,11 @@ static int count_them(struct net *net,
int count;
u32 hash;
- if (family == NFPROTO_IPV6) {
+ if (family == NFPROTO_IPV6)
hash = connlimit_iphash6(addr, mask);
- root = &data->climit_root6[hash];
- } else {
+ else
hash = connlimit_iphash(addr->ip & mask->ip);
- root = &data->climit_root4[hash];
- }
+ root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -382,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- info->data->climit_root4[i] = RB_ROOT;
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- info->data->climit_root6[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ info->data->climit_root[i] = RB_ROOT;
return 0;
}
@@ -416,10 +408,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- destroy_tree(&info->data->climit_root4[i]);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- destroy_tree(&info->data->climit_root6[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ destroy_tree(&info->data->climit_root[i]);
kfree(info->data);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e1874f28b..ffdb611e54a2 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -659,12 +659,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
- rcu_read_lock_bh();
+ local_bh_disable();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
- rcu_read_unlock_bh();
+ local_bh_enable();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
@@ -689,12 +689,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
/* below the limit */
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
/* default match is underlimit - so over the limit, we need to invert */
return cfg->mode & XT_HASHLIMIT_INVERT;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa9551d08..36e14b1f061d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
sizeof(struct tcphdr), optsize, opts);
}
- rcu_read_lock();
list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
int foptsize, optnum;
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
info->loglevel == XT_OSF_LOGLEVEL_FIRST)
break;
}
- rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 30d632509f82..d558e882ca0c 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -579,8 +579,8 @@ static struct nf_conn *
ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 l3num, struct sk_buff *skb, bool natted)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 33fd061305c4..2f2e1338cd3d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6530,7 +6530,7 @@ security_initcall(selinux_init);
#if defined(CONFIG_NETFILTER)
-static struct nf_hook_ops selinux_nf_ops[] = {
+static const struct nf_hook_ops selinux_nf_ops[] = {
{
.hook = selinux_ipv4_postroute,
.pf = NFPROTO_IPV4,
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index cdeb0f3243dd..e36d17835d4f 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -58,7 +58,7 @@ static unsigned int smack_ipv4_output(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops smack_nf_ops[] = {
+static const struct nf_hook_ops smack_nf_ops[] = {
{
.hook = smack_ipv4_output,
.pf = NFPROTO_IPV4,