aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netfilter/ipset/ip_set.h60
-rw-r--r--include/linux/netfilter_bridge.h50
-rw-r--r--include/linux/skbuff.h14
-rw-r--r--include/net/ip_vs.h15
-rw-r--r--include/net/neighbour.h2
-rw-r--r--include/net/netfilter/ipv4/nf_reject.h2
-rw-r--r--include/net/netfilter/ipv6/nf_reject.h2
-rw-r--r--include/net/netfilter/nf_nat.h10
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/uapi/linux/ip_vs.h3
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h12
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h17
-rw-r--r--include/uapi/linux/netfilter/xt_set.h10
-rw-r--r--net/Kconfig7
-rw-r--r--net/bridge/Makefile5
-rw-r--r--net/bridge/br.c14
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_netfilter.c116
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_nf_core.c96
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_sysfs_br.c4
-rw-r--r--net/ipv4/netfilter/Kconfig46
-rw-r--r--net/ipv6/netfilter/Kconfig50
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c11
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c11
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c11
-rw-r--r--net/netfilter/ipset/ip_set_core.c30
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h16
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c173
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c19
-rw-r--r--net/netfilter/ipvs/Kconfig10
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c74
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c112
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c79
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c388
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c26
-rw-r--r--net/netfilter/nf_tables_api.c146
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/x_tables.c30
-rw-r--r--net/netfilter/xt_set.c155
72 files changed, 1595 insertions, 512 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 96afc29184be..f1606fa6132d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
IPSET_EXT_BIT_COMMENT = 2,
IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+ IPSET_EXT_BIT_SKBINFO = 3,
+ IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
/* Mark set with an extension which needs to call destroy */
IPSET_EXT_BIT_DESTROY = 7,
IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s) ((s)->extensions & IPSET_EXT_SKBINFO)
#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
/* Extension id, in size order */
enum ip_set_ext_id {
IPSET_EXT_ID_COUNTER = 0,
IPSET_EXT_ID_TIMEOUT,
+ IPSET_EXT_ID_SKBINFO,
IPSET_EXT_ID_COMMENT,
IPSET_EXT_ID_MAX,
};
@@ -92,6 +96,10 @@ struct ip_set_ext {
u64 packets;
u64 bytes;
u32 timeout;
+ u32 skbmark;
+ u32 skbmarkmask;
+ u32 skbprio;
+ u16 skbqueue;
char *comment;
};
@@ -104,6 +112,13 @@ struct ip_set_comment {
char *str;
};
+struct ip_set_skbinfo {
+ u32 skbmark;
+ u32 skbmarkmask;
+ u32 skbprio;
+ u16 skbqueue;
+};
+
struct ip_set;
#define ext_timeout(e, s) \
@@ -112,7 +127,8 @@ struct ip_set;
(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
#define ext_comment(e, s) \
(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s) \
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_SKBINFO(set))
+ cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
if (SET_WITH_FORCEADD(set))
cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
@@ -304,6 +322,43 @@ ip_set_update_counter(struct ip_set_counter *counter,
}
}
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ mext->skbmark = skbinfo->skbmark;
+ mext->skbmarkmask = skbinfo->skbmarkmask;
+ mext->skbprio = skbinfo->skbprio;
+ mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+ /* Send nonzero parameters only */
+ return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+ cpu_to_be64((u64)skbinfo->skbmark << 32 |
+ skbinfo->skbmarkmask))) ||
+ (skbinfo->skbprio &&
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ cpu_to_be32(skbinfo->skbprio))) ||
+ (skbinfo->skbqueue &&
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ cpu_to_be16(skbinfo->skbqueue)));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext)
+{
+ skbinfo->skbmark = ext->skbmark;
+ skbinfo->skbmarkmask = ext->skbmarkmask;
+ skbinfo->skbprio = ext->skbprio;
+ skbinfo->skbqueue = ext->skbqueue;
+}
+
static inline bool
ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
{
@@ -497,6 +552,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
if (SET_WITH_COMMENT(set) &&
ip_set_put_comment(skb, ext_comment(e, set)))
return -EMSGSIZE;
+ if (SET_WITH_SKBINFO(set) &&
+ ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+ return -EMSGSIZE;
return 0;
}
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 8ab1c278b66d..c755e4971fa3 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
NF_BR_PRI_LAST = INT_MAX,
};
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#define BRNF_PKT_TYPE 0x01
#define BRNF_BRIDGED_DNAT 0x02
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
#define BRNF_8021Q 0x10
#define BRNF_PPPoE 0x20
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
- return nf_bridge_copy_header(skb);
- return 0;
-}
-
static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
}
}
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_8021Q)
+ skb->protocol = htons(ETH_P_8021Q);
+ else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+ int err;
+ unsigned int header_size;
+
+ nf_bridge_update_protocol(skb);
+ header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+ err = skb_cow_head(skb, header_size);
+ if (err)
+ return err;
+
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
+ return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+ return nf_bridge_copy_header(skb);
+ return 0;
+}
+
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b6cced304b26..262efdbc346b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -156,7 +156,7 @@ struct nf_conntrack {
};
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
atomic_t use;
unsigned int mask;
@@ -534,7 +534,7 @@ struct sk_buff {
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info *nf_bridge;
#endif
unsigned int len,
@@ -556,8 +556,6 @@ struct sk_buff {
/* one bit hole */
kmemcheck_bitfield_end(flags1);
-
-
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
@@ -3016,7 +3014,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -3034,7 +3032,7 @@ static inline void nf_reset(struct sk_buff *skb)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
#endif
@@ -3057,7 +3055,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
if (copy)
dst->nfctinfo = src->nfctinfo;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
#endif
@@ -3072,7 +3070,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(dst->nf_bridge);
#endif
__nf_copy(dst, src, true);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 624a8a54806d..576d7f0bed5d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -535,6 +535,7 @@ struct ip_vs_conn {
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
+ __u16 daf; /* Address family of the dest */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
@@ -648,6 +649,9 @@ struct ip_vs_dest_user_kern {
/* thresholds for active connections */
u32 u_threshold; /* upper threshold */
u32 l_threshold; /* lower threshold */
+
+ /* Address family of addr */
+ u16 af;
};
@@ -986,6 +990,10 @@ struct netns_ipvs {
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
+ /* Number of heterogeneous destinations, needed because
+ * heterogeneous are not supported when synchronization is
+ * enabled */
+ unsigned int mixed_address_family_dests;
};
#define DEFAULT_SYNC_THRESHOLD 3
@@ -1210,7 +1218,7 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
-struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
+struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark);
@@ -1396,8 +1404,9 @@ void ip_vs_unregister_nl_ioctl(void);
int ip_vs_control_init(void);
void ip_vs_control_cleanup(void);
struct ip_vs_dest *
-ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
- __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport,
+ const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark, __u32 flags);
void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 47f425464f84..f60558d0254c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
return 0;
}
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
unsigned int seq, hh_alen;
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 931fbf812171..f713b5a31d62 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -98,7 +98,7 @@ static void nf_send_reset(struct sk_buff *oldskb, int hook)
nf_ct_attach(nskb, oldskb);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 710d17ed70b4..7a10cfcd8e33 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -147,7 +147,7 @@ static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
nf_ct_attach(nskb, oldskb);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip6_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index a71dd333ac68..344b1ab19220 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,10 +32,8 @@ struct nf_conn_nat {
struct hlist_node bysource;
struct nf_conn *ct;
union nf_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
- defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
int masq_index;
#endif
};
@@ -68,8 +66,8 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
struct nf_conn_nat *nat,
const struct net_device *out)
{
-#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
nat->masq_index != out->ifindex;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c4d86198d3d6..3d7292392fac 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -241,6 +241,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @dtype: data type (verdict or numeric type defined by userspace)
* @size: maximum set size
* @nelems: number of elements
+ * @policy: set parameterization (see enum nft_set_policies)
* @ops: set ops
* @flags: set flags
* @klen: key length
@@ -255,6 +256,7 @@ struct nft_set {
u32 dtype;
u32 size;
u32 nelems;
+ u16 policy;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags;
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index fbcffe8041f7..cabe95d5b461 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -384,6 +384,9 @@ enum {
IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */
IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */
+
+ IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */
+
__IPVS_DEST_ATTR_MAX,
};
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 78c2f2e79920..ca03119111a2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
IPSET_ATTR_BYTES,
IPSET_ATTR_PACKETS,
IPSET_ATTR_COMMENT,
+ IPSET_ATTR_SKBMARK,
+ IPSET_ATTR_SKBPRIO,
+ IPSET_ATTR_SKBQUEUE,
__IPSET_ATTR_ADT_MAX,
};
#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
IPSET_ERR_COUNTER,
IPSET_ERR_COMMENT,
IPSET_ERR_INVALID_MARKMASK,
+ IPSET_ERR_SKBINFO,
/* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+ IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+ IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+ IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+ IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+ IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+ IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
IPSET_FLAG_CMD_MAX = 15,
};
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
IPSET_FLAG_BIT_WITH_FORCEADD = 5,
IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+ IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+ IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
IPSET_FLAG_CADT_MAX = 15,
};
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index eeec0ae845ef..b72ccfeaf865 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -51,6 +51,8 @@ enum nft_verdicts {
* @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
* @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
* @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
+ * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
@@ -68,6 +70,8 @@ enum nf_tables_msg_types {
NFT_MSG_NEWSETELEM,
NFT_MSG_GETSETELEM,
NFT_MSG_DELSETELEM,
+ NFT_MSG_NEWGEN,
+ NFT_MSG_GETGEN,
NFT_MSG_MAX,
};
@@ -806,9 +810,22 @@ enum nft_nat_attributes {
* @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
*/
enum nft_masq_attributes {
+ NFTA_MASQ_UNSPEC,
NFTA_MASQ_FLAGS,
__NFTA_MASQ_MAX
};
#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1)
+/**
+ * enum nft_gen_attributes - nf_tables ruleset generation attributes
+ *
+ * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
+ */
+enum nft_gen_attributes {
+ NFTA_GEN_UNSPEC,
+ NFTA_GEN_ID,
+ __NFTA_GEN_MAX
+};
+#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
+
#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
index 964d3d42f874..d6a1df1f2947 100644
--- a/include/uapi/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
@@ -71,4 +71,14 @@ struct xt_set_info_match_v3 {
__u32 flags;
};
+/* Revision 3 target */
+
+struct xt_set_info_target_v3 {
+ struct xt_set_info add_set;
+ struct xt_set_info del_set;
+ struct xt_set_info map_set;
+ __u32 flags;
+ __u32 timeout;
+};
+
#endif /*_XT_SET_H*/
diff --git a/net/Kconfig b/net/Kconfig
index 4051fdfa4367..dc5d700d05e7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -176,10 +176,11 @@ config NETFILTER_ADVANCED
If unsure, say Y.
config BRIDGE_NETFILTER
- bool "Bridged IP/ARP packets filtering"
- depends on BRIDGE && NETFILTER && INET
+ tristate "Bridged IP/ARP packets filtering"
+ depends on (BRIDGE || BRIDGE=n)
+ depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
- default y
+ default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 8590b942bffa..5e3eac5dc8b9 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -6,11 +6,12 @@ obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
br_ioctl.o br_stp.o br_stp_bpdu.o \
- br_stp_if.o br_stp_timer.o br_netlink.o
+ br_stp_if.o br_stp_timer.o br_netlink.o \
+ br_nf_core.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1a755a1e5410..44425aff7cba 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -161,7 +161,7 @@ static int __init br_init(void)
if (err)
goto err_out1;
- err = br_netfilter_init();
+ err = br_nf_core_init();
if (err)
goto err_out2;
@@ -179,11 +179,16 @@ static int __init br_init(void)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
+ pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+ "deprecated. Update your scripts to load br_netfilter if you "
+ "need this.\n");
+
return 0;
+
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
- br_netfilter_fini();
+ br_nf_core_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
@@ -196,20 +201,17 @@ err_out:
static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
-
br_netlink_fini();
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
-
unregister_pernet_subsys(&br_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
- br_netfilter_fini();
+ br_nf_core_fini();
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = NULL;
#endif
-
br_fdb_fini();
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 568cccd39a3d..659cac15c0df 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vid = 0;
rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
br_nf_pre_routing_finish_bridge_slow(skb);
rcu_read_unlock();
@@ -167,7 +167,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
#endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 056b67b0e277..992ec49a96aa 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
return 0;
}
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct sk_buff *skb)
{
@@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb)
br_dev_queue_push_xmit);
}
+EXPORT_SYMBOL_GPL(br_forward_finish);
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 366c43649079..6fd5522df696 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,6 +140,7 @@ drop:
kfree_skb(skb);
goto out;
}
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a615264cf01a..97e43937aaca 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
- return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
- return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
- .family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
- .update_pmtu = fake_update_pmtu,
- .redirect = fake_redirect,
- .cow_metrics = fake_cow_metrics,
- .neigh_lookup = fake_neigh_lookup,
- .mtu = fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it. Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
- struct rtable *rt = &br->fake_rtable;
-
- atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.dev = br->dev;
- rt->dst.path = &rt->dst;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
- rt->dst.ops = &fake_dst_ops;
-}
-
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -245,14 +185,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
skb->nf_bridge->data, header_size);
}
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
- if (skb->nf_bridge->mask & BRNF_8021Q)
- skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
- skb->protocol = htons(ETH_P_PPP_SES);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
@@ -320,26 +252,6 @@ drop:
return -1;
}
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
- int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return err;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return 0;
-}
-
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
@@ -1059,38 +971,42 @@ static struct ctl_table brnf_table[] = {
};
#endif
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
{
int ret;
- ret = dst_entries_init(&fake_dst_ops);
+ ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret < 0)
return ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- if (ret < 0) {
- dst_entries_destroy(&fake_dst_ops);
- return ret;
- }
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- dst_entries_destroy(&fake_dst_ops);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err1;
}
#endif
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
+err1:
+ nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ return ret;
}
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
{
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(brnf_sysctl_header);
#endif
- dst_entries_destroy(&fake_dst_ops);
}
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 90a91e137acc..0fa66b83685f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -602,7 +602,7 @@ out_af:
return err;
}
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
{
br_mdb_uninit();
rtnl_af_unregister(&br_af_ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
new file mode 100644
index 000000000000..387cb3bd017c
--- /dev/null
+++ b/net/bridge/br_nf_core.c
@@ -0,0 +1,96 @@
+/*
+ * Handle firewalling core
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+ .family = AF_INET,
+ .protocol = cpu_to_be16(ETH_P_IP),
+ .update_pmtu = fake_update_pmtu,
+ .redirect = fake_redirect,
+ .cow_metrics = fake_cow_metrics,
+ .neigh_lookup = fake_neigh_lookup,
+ .mtu = fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+ [RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+ struct rtable *rt = &br->fake_rtable;
+
+ atomic_set(&rt->dst.__refcnt, 1);
+ rt->dst.dev = br->dev;
+ rt->dst.path = &rt->dst;
+ dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
+ rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+ return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+ dst_entries_destroy(&fake_dst_ops);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b6c04cbcfdc5..f53592fc3ef9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -221,7 +221,7 @@ struct net_bridge
struct pcpu_sw_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct rtable fake_rtable;
bool nf_call_iptables;
bool nf_call_ip6tables;
@@ -754,13 +754,13 @@ static inline int br_vlan_enabled(struct net_bridge *br)
#endif
/* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
void br_netfilter_rtable_init(struct net_bridge *);
#else
-#define br_netfilter_init() (0)
-#define br_netfilter_fini() do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
#define br_netfilter_rtable_init(x)
#endif
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c9e2572b15f4..cb431c6016ee 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store(
}
static DEVICE_ATTR_RW(multicast_startup_query_interval);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static ssize_t nf_call_iptables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
@@ -763,7 +763,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_multicast_query_response_interval.attr,
&dev_attr_multicast_startup_query_interval.attr,
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
&dev_attr_nf_call_iptables.attr,
&dev_attr_nf_call_ip6tables.attr,
&dev_attr_nf_call_arptables.attr,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d189c5262bdb..345242a79db6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,16 +61,6 @@ config NFT_CHAIN_ROUTE_IPV4
fields such as the source, destination, type of service and
the packet mark.
-config NFT_CHAIN_NAT_IPV4
- depends on NF_TABLES_IPV4
- depends on NF_NAT_IPV4 && NFT_NAT
- tristate "IPv4 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv4 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NFT_REJECT_IPV4
depends on NF_TABLES_IPV4
default NFT_REJECT
@@ -94,6 +84,30 @@ config NF_NAT_IPV4
if NF_NAT_IPV4
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV4
+ tristate "IPv4 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection).
+
+config NFT_MASQ_IPV4
+ tristate "IPv4 masquerading support for nf_tables"
+ depends on NF_TABLES_IPV4
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV4
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
depends on NF_CONNTRACK_SNMP
@@ -232,18 +246,6 @@ config IP_NF_NAT
if IP_NF_NAT
-config NF_NAT_MASQUERADE_IPV4
- tristate "IPv4 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection).
-
-config NFT_MASQ_IPV4
- tristate "IPv4 masquerading support for nf_tables"
- depends on NF_TABLES_IPV4
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV4
-
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NF_NAT_MASQUERADE_IPV4
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a8f25306a46a..bb1a40db7be1 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -40,16 +40,6 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
-config NFT_CHAIN_NAT_IPV6
- depends on NF_TABLES_IPV6
- depends on NF_NAT_IPV6 && NFT_NAT
- tristate "IPv6 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv6 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NFT_REJECT_IPV6
depends on NF_TABLES_IPV6
default NFT_REJECT
@@ -70,6 +60,34 @@ config NF_NAT_IPV6
forms of full Network Address Port Translation. This can be
controlled by iptables or nft.
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV6
+ tristate "IPv6 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection) for IPv6.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
+ depends on NF_TABLES_IPV6
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+endif # NF_NAT_IPV6
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
@@ -258,18 +276,6 @@ config IP6_NF_NAT
if IP6_NF_NAT
-config NF_NAT_MASQUERADE_IPV6
- tristate "IPv6 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection) for IPv6.
-
-config NFT_MASQ_IPV6
- tristate "IPv6 masquerade support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV6
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NF_NAT_MASQUERADE_IPV6
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 2f7f5c32c6f9..234a8ec82076 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_MAC
+ tristate "hash:mac set support"
+ depends on IP_SET
+ help
+ This option adds the hash:mac set type support, by which
+ one can store MAC (ethernet address) elements in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NETPORTNET
tristate "hash:net,port,net set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 231f10196cb9..3dbd5e958489 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f2c7d83dc23f..6f024a8a1534 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -128,6 +128,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
return 0;
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
return 1;
}
@@ -161,6 +163,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(x, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(x, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index dafdb39ef042..55b083ec587a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -139,7 +140,10 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -357,6 +361,9 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index dbad505e79e3..86104744b00f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -240,7 +241,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -394,6 +398,9 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index a4b65ae1986c..005dd36444c3 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,7 +22,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -139,7 +140,10 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -291,6 +295,9 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 5593e97426c4..26c795e6b57f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.len = sizeof(unsigned long),
.align = __alignof__(unsigned long),
},
+ [IPSET_EXT_ID_SKBINFO] = {
+ .type = IPSET_EXT_SKBINFO,
+ .flag = IPSET_FLAG_WITH_SKBINFO,
+ .len = sizeof(struct ip_set_skbinfo),
+ .align = __alignof__(struct ip_set_skbinfo),
+ },
[IPSET_EXT_ID_COMMENT] = {
.type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
.flag = IPSET_FLAG_WITH_COMMENT,
@@ -382,6 +388,7 @@ int
ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
+ u64 fullmark;
if (tb[IPSET_ATTR_TIMEOUT]) {
if (!(set->extensions & IPSET_EXT_TIMEOUT))
return -IPSET_ERR_TIMEOUT;
@@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
-
+ if (tb[IPSET_ATTR_SKBMARK]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+ ext->skbmark = fullmark >> 32;
+ ext->skbmarkmask = fullmark & 0xffffffff;
+ }
+ if (tb[IPSET_ATTR_SKBPRIO]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbprio = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBPRIO]));
+ }
+ if (tb[IPSET_ATTR_SKBQUEUE]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbqueue = be16_to_cpu(nla_get_be16(
+ tb[IPSET_ATTR_SKBQUEUE]));
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -1397,7 +1422,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
struct sk_buff *skb2;
struct nlmsgerr *errmsg;
- size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+ size_t payload = min(SIZE_MAX,
+ sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *cmdattr;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 8a38890cbe5e..fee7c64e4dd1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -720,6 +720,8 @@ reuse_slot:
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
out:
rcu_read_unlock_bh();
@@ -797,6 +799,9 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(data, set),
ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
return mtype_do_data_match(data);
}
@@ -1049,8 +1054,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct HTYPE *h;
struct htable *t;
+#ifndef IP_SET_PROTO_UNDEF
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
markmask = 0xffffffff;
@@ -1132,25 +1139,32 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
rcu_assign_pointer(h->table, t);
set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
+#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
+#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
+#endif
IPSET_TOKEN(HTYPE, 4_gc_init)(set,
IPSET_TOKEN(HTYPE, 4_gc));
+#ifndef IP_SET_PROTO_UNDEF
else
IPSET_TOKEN(HTYPE, 6_gc_init)(set,
IPSET_TOKEN(HTYPE, 6_gc));
+#endif
}
-
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
set->name, jhash_size(t->htable_bits),
t->htable_bits, h->maxelem, set->data, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e52739938533..76959d79e9d1 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
/* 2 Comments support */
-#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
+/* 3 Forceadd support */
+#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -111,7 +112,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -247,6 +251,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -295,6 +302,9 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 4eff0a297254..7abf9788cfa8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+/* 1 Forceadd support */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -113,7 +114,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +248,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -301,6 +308,9 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index f37a5ae8a5e0..dcbcceb9a52f 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -122,7 +123,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -287,6 +291,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -370,6 +377,9 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 41ef00eda874..7ef93fc887a1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -124,7 +125,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -295,6 +299,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -382,6 +389,9 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7308d84f9277..b6012ad92781 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -30,7 +30,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -179,7 +180,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -432,6 +436,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -541,6 +548,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
new file mode 100644
index 000000000000..65690b52a4d5
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -0,0 +1,173 @@
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+ /* Zero valued IP addresses cannot be stored */
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+ const struct hash_mac4_elem *e2,
+ u32 *multi)
+{
+ return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static inline bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+ return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+}
+
+static inline void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+ const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE hash_mac4
+#define PF 4
+#define HOST_MASK 32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_ONE_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -EINVAL;
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+ .name = "hash:mac",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_MAC,
+ .dimension = IPSET_DIM_ONE,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_mac_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+ return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+ ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 4c7d495783a3..6b3ac10ac2f1 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -27,7 +27,8 @@
/* 2 nomatch flag support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -150,7 +151,10 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -318,7 +322,10 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -377,6 +384,9 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index db2606805b35..03cdb69ac9bf 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -28,7 +28,8 @@
/* 2 /0 support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -281,7 +282,10 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -514,7 +518,10 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -590,6 +597,9 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 96b131366e7b..da00284b3571 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -171,7 +172,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -394,7 +398,10 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -462,6 +469,9 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 1c645fbd09c7..c0ddb58d19dc 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -29,7 +29,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -172,7 +173,10 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -389,7 +393,10 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -489,6 +496,9 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 2f0034347189..b8053d675fc3 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 0 Comments support added */
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -189,7 +190,10 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -460,7 +464,10 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -569,6 +576,9 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f87adbad6076..f8f682806e36 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -17,7 +17,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support added */
+/* 2 Comments support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -73,6 +74,10 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
ip_set_update_counter(ext_counter(e, set),
ext, &opt->ext,
cmdflags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(e, set),
+ ext, &opt->ext,
+ cmdflags);
return ret;
}
}
@@ -197,6 +202,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
return 0;
}
@@ -307,6 +314,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
@@ -378,7 +387,10 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -667,6 +679,9 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 0c3b1670b0d1..3b6929dec748 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -152,6 +152,16 @@ config IP_VS_WLC
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_FO
+ tristate "weighted failover scheduling"
+ ---help---
+ The weighted failover scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 34ee602ddb66..38b2723b2e3d 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 610e19c0e13f..b0f7b626b56d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/in.h>
+#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
struct ip_vs_aligned_lock
{
spinlock_t l;
@@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+#endif
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
rcu_read_lock();
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+
+ /* This function is only invoked by the synchronization code. We do
+ * not currently support heterogeneous pools with synchronization,
+ * so we can make the assumption that the svc_af is the same as the
+ * dest_af
+ */
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
- IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));
/*
@@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(const struct ip_vs_conn_param *p,
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
@@ -867,6 +889,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
+ cp->daf = dest_af;
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
@@ -874,7 +897,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
&cp->vaddr, p->vaddr);
cp->vport = p->vport;
- ip_vs_addr_set(p->af, &cp->daddr, daddr);
+ ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ char dbuf[IP_VS_ADDRSTRLEN];
if (!ip_vs_conn_net_eq(cp, net))
return 0;
@@ -1050,24 +1074,32 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
pe_data[len] = '\0';
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%pI6 %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %08X %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
@@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags)
static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
+ char dbuf[IP_VS_ADDRSTRLEN];
if (v == SEQ_START_TOKEN)
seq_puts(seq,
@@ -1117,12 +1150,21 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
return 0;
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
- seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+ seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
@@ -1130,11 +1172,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5c34e8d42e01..990decba1fe4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* This adds param.pe_data to the template,
* and thus param.pe_data will be destroyed
* when the template expires */
- ct = ip_vs_conn_new(&param, &dest->addr, dport,
+ ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
@@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
src_port, &iph->daddr, dst_port, &param);
- cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
+ cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
+ skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
@@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr,
pptr[1], &p);
- cp = ip_vs_conn_new(&p, &dest->addr,
+ cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
if (!cp) {
@@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
- IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
- IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
@@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
- cp = ip_vs_conn_new(&p, &daddr, 0,
+ cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
if (!cp)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd2b208ba56c..ac7ba689efe7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -574,8 +574,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
* Called under RCU lock.
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
@@ -583,9 +583,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* Find the destination for the given service
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
- if ((dest->af == svc->af)
- && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
- && (dest->port == dport)) {
+ if ((dest->af == dest_af) &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+ (dest->port == dport)) {
/* HIT */
return dest;
}
@@ -602,7 +602,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -613,14 +613,14 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
port = 0;
- dest = ip_vs_lookup_dest(svc, daddr, port);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
if (!dest)
- dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
return dest;
}
@@ -657,8 +657,8 @@ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
@@ -671,11 +671,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- if (dest->af == svc->af &&
- ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+ if (dest->af == dest_af &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
@@ -779,6 +779,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_scheduler *sched;
int conn_flags;
+ /* We cannot modify an address and change the address family */
+ BUG_ON(!add && udest->af != dest->af);
+
+ if (add && udest->af != svc->af)
+ ipvs->mixed_address_family_dests++;
+
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
@@ -816,6 +822,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
+ dest->af = udest->af;
+
spin_lock_bh(&dest->dst_lock);
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
@@ -847,7 +855,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
EnterFunction(2);
#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6) {
+ if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
@@ -875,12 +883,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
u64_stats_init(&ip_vs_dest_stats->syncp);
}
- dest->af = svc->af;
+ dest->af = udest->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
- ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
@@ -928,11 +936,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest != NULL) {
@@ -944,12 +952,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+ dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
- IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
@@ -992,11 +1000,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -1055,6 +1063,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
list_del_rcu(&dest->n_list);
svc->num_dests--;
+ if (dest->af != svc->af)
+ net_ipvs(svc->net)->mixed_address_family_dests--;
+
if (svcupd) {
struct ip_vs_scheduler *sched;
@@ -1078,7 +1089,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -2244,6 +2255,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
udest->weight = udest_compat->weight;
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
+ udest->af = AF_INET;
}
static int
@@ -2480,6 +2492,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
if (count >= get->num_dests)
break;
+ /* Cannot expose heterogeneous members via sockopt
+ * interface
+ */
+ if (dest->af != svc->af)
+ continue;
+
entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
@@ -2777,6 +2795,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+ [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3032,7 +3051,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
atomic_read(&dest->inactconns)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
- atomic_read(&dest->persistconns)))
+ atomic_read(&dest->persistconns)) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
@@ -3113,6 +3133,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
+ struct nlattr *nla_addr_family;
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
@@ -3121,6 +3142,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
nla_port = attrs[IPVS_DEST_ATTR_PORT];
+ nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
if (!(nla_addr && nla_port))
return -EINVAL;
@@ -3130,6 +3152,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
+ if (nla_addr_family)
+ udest->af = nla_get_u16(nla_addr_family);
+ else
+ udest->af = 0;
+
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
@@ -3234,6 +3261,12 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ /* The synchronization protocol is incompatible with mixed family
+ * services
+ */
+ if (net_ipvs(net)->mixed_address_family_dests > 0)
+ return -EINVAL;
+
return start_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
@@ -3357,6 +3390,35 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
need_full_dest);
if (ret)
goto out;
+
+ /* Old protocols did not allow the user to specify address
+ * family, so we set it to zero instead. We also didn't
+ * allow heterogeneous pools in the old code, so it's safe
+ * to assume that this will have the same address family as
+ * the service.
+ */
+ if (udest.af == 0)
+ udest.af = svc->af;
+
+ if (udest.af != svc->af) {
+ /* The synchronization protocol is incompatible
+ * with mixed family services
+ */
+ if (net_ipvs(net)->sync_state) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Which connection types do we support? */
+ switch (udest.conn_flags) {
+ case IP_VS_CONN_F_TUNNEL:
+ /* We are able to forward this */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
switch (cmd) {
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index c3b84546ea9e..6be5c538b71e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -234,7 +234,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
new file mode 100644
index 000000000000..e09874d02938
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -0,0 +1,79 @@
+/*
+ * IPVS: Weighted Fail Over module
+ *
+ * Authors: Kenny Mathis <kmathis@chokepoint.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Kenny Mathis : added initial functionality based on weight
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *hweight = NULL;
+ int hw = 0; /* Track highest weight */
+
+ IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+ /* Basic failover functionality
+ * Find virtual server with highest weight and send it traffic
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > hw) {
+ hweight = dest;
+ hw = atomic_read(&dest->weight);
+ }
+ }
+
+ if (hweight) {
+ IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+ IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+ ntohs(hweight->port),
+ atomic_read(&hweight->activeconns),
+ atomic_read(&hweight->weight));
+ return hweight;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+ .name = "fo",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+ .schedule = ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77c173282f38..a64fa15790e5 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -233,7 +233,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
- n_cp = ip_vs_conn_new(&p, &from, port,
+ /* As above, this is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
cp->dest, skb->mark);
@@ -396,7 +397,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
- n_cp = ip_vs_conn_new(&p, &cp->daddr,
+ /* This is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 547ff33c1efd..127f14046c51 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -199,11 +199,11 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
- en = ip_vs_lblc_get(dest->af, tbl, daddr);
+ en = ip_vs_lblc_get(af, tbl, daddr);
if (en) {
if (en->dest == dest)
return en;
@@ -213,8 +213,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
ip_vs_dest_hold(dest);
@@ -521,13 +521,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblc_new(tbl, &iph->daddr, dest);
+ ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3f21a2f47de1..2229d2d8bbe0 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -362,18 +362,18 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblcr_entry *en;
- en = ip_vs_lblcr_get(dest->af, tbl, daddr);
+ en = ip_vs_lblcr_get(af, tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
/* initialize its dest set */
@@ -706,13 +706,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblcr_new(tbl, &iph->daddr, dest);
+ ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 2bdcb1cf2127..19a0769a989a 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -59,7 +59,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
else
IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
"inactconns %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 961a6de9bb29..a8b63401e773 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -107,7 +107,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
out:
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 2f7ea7564044..5b84c0b56642 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index e3a697234a98..8e92beb0cca9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 176b87c35e34..58bacfc461ee 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -95,7 +95,7 @@ stop:
spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index e446b9fa7424..f8e2d00f528b 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -108,7 +108,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SED: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index cc65b2f42cd4..98a13433b68c 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -138,7 +138,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* if the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
@@ -153,7 +153,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable "
"server %s:%d (offset %d), reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
@@ -192,7 +192,7 @@ ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
- i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
atomic_read(&dest->weight));
/* Don't move to next dest until filling weight */
@@ -342,7 +342,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index eadffb29dec0..7162c86fd50d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -880,10 +880,17 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* but still handled.
*/
rcu_read_lock();
- dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
- param->vport, protocol, fwmark, flags);
+ /* This function is only invoked by the synchronization
+ * code. We do not currently support heterogeneous pools
+ * with synchronization, so we can make the assumption that
+ * the svc_af is the same as the dest_af
+ */
+ dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ param->vaddr, param->vport, protocol,
+ fwmark, flags);
- cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+ fwmark);
rcu_read_unlock();
if (!cp) {
kfree(param->pe_data);
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index b5b4650d50a9..6b366fd90554 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -80,7 +80,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "WLC: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 0546cd572d6b..17e6d4406ca7 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -216,7 +216,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 56896a412bce..91f17c1eb8a2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -157,18 +157,113 @@ retry:
return rt;
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+ return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+ int rt_mode,
+ bool new_rt_is_local)
+{
+ bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+ bool source_is_loopback;
+ bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+ source_is_loopback =
+ (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ (addr_type & IPV6_ADDR_LOOPBACK);
+ old_rt_is_local = __ip_vs_is_local_route6(
+ (struct rt6_info *)skb_dst(skb));
+ } else
+#endif
+ {
+ source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+ old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+ }
+
+ if (unlikely(new_rt_is_local)) {
+ if (!rt_mode_allow_local)
+ return true;
+ if (!rt_mode_allow_redirect && !old_rt_is_local)
+ return true;
+ } else {
+ if (!rt_mode_allow_non_local)
+ return true;
+ if (source_is_loopback)
+ return true;
+ }
+ return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+}
+
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+ struct ip_vs_iphdr *ipvsh,
+ struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
+ return false;
+ }
+ } else
+#endif
+ {
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+ /* If we're going to tunnel the packet and pmtu discovery
+ * is disabled, we'll just fragment it anyway
+ */
+ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+ return true;
+
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n",
+ &ip_hdr(skb)->saddr);
+ return false;
+ }
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, int rt_mode, __be32 *ret_saddr)
+__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+ __be32 daddr, int rt_mode, __be32 *ret_saddr,
+ struct ip_vs_iphdr *ipvsh)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
- struct rtable *ort; /* Original route */
- struct iphdr *iph;
- __be16 df;
int mtu;
int local, noref = 1;
@@ -218,30 +313,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
- (rt->rt_flags & RTCF_LOCAL) ?
- "local":"non-local", &daddr);
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI4\n", &dest->addr.ip);
goto err_put;
}
- iph = ip_hdr(skb);
- if (likely(!local)) {
- if (unlikely(ipv4_is_loopback(iph->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI4 to non-local address, dest: %pI4\n",
- &iph->saddr, &daddr);
- goto err_put;
- }
- } else {
- ort = skb_rtable(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !(ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
- "local requires NAT method, dest: %pI4\n",
- &iph->daddr, &daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
ip_rt_put(rt);
@@ -250,28 +329,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
- df = iph->frag_off & htons(IP_DF);
} else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
}
- ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
- /* MTU check allowed? */
- df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- /* MTU checking */
- if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -295,12 +363,6 @@ err_unreach:
}
#ifdef CONFIG_IP_VS_IPV6
-
-static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
-{
- return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
-}
-
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm)
@@ -339,14 +401,13 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
- struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
int mtu;
int local, noref = 1;
@@ -393,32 +454,15 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = __ip_vs_is_local_route6(rt);
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
- local ? "local":"non-local", daddr);
+
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI6\n", &dest->addr.in6);
goto err_put;
}
- if (likely(!local)) {
- if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI6c to non-local address, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- goto err_put;
- }
- } else {
- ort = (struct rt6_info *) skb_dst(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !__ip_vs_is_local_route6(ort)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c "
- "to local requires NAT method, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
dst_release(&rt->dst);
@@ -429,28 +473,17 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
goto err_put;
}
- ort = (struct rt6_info *) skb_dst(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
- if (!skb->dev)
- skb->dev = net->loopback_dev;
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -556,8 +589,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
- NULL) < 0)
+ if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
ip_send_check(iph);
@@ -586,7 +619,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -633,10 +666,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL);
+ IP_VS_RT_MODE_RDR, NULL, ipvsh);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -721,8 +754,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR);
@@ -791,6 +824,81 @@ tx_error:
}
#endif
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb. This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+ unsigned int max_headroom, __u8 *next_protocol,
+ __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+ __be16 *df)
+{
+ struct sk_buff *new_skb = NULL;
+ struct iphdr *old_iph = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb)
+ goto error;
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ old_ipv6h = ipv6_hdr(skb);
+ *next_protocol = IPPROTO_IPV6;
+ if (payload_len)
+ *payload_len =
+ ntohs(old_ipv6h->payload_len) +
+ sizeof(*old_ipv6h);
+ *dsfield = ipv6_get_dsfield(old_ipv6h);
+ *ttl = old_ipv6h->hop_limit;
+ if (df)
+ *df = 0;
+ } else
+#endif
+ {
+ old_iph = ip_hdr(skb);
+ /* Copy DF, reset fragment offset and MF */
+ if (df)
+ *df = (old_iph->frag_off & htons(IP_DF));
+ *next_protocol = IPPROTO_IPIP;
+
+ /* fix old IP header checksum */
+ ip_send_check(old_iph);
+ *dsfield = ipv4_get_dsfield(old_iph);
+ *ttl = old_iph->ttl;
+ if (payload_len)
+ *payload_len = ntohs(old_iph->tot_len);
+ }
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+ if (encaps_af == AF_INET) {
+ if (orig_af == AF_INET)
+ return SKB_GSO_IPIP;
+
+ return SKB_GSO_SIT;
+ }
+
+ /* GSO: we need to provide proper SKB_GSO_ value for IPv6:
+ * SKB_GSO_SIT/IPV6
+ */
+ return 0;
+}
/*
* IP Tunneling transmitter
@@ -819,9 +927,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- u8 tos = old_iph->tos;
- __be16 df;
+ __u8 next_protocol = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
+ __be16 df = 0;
+ __be16 *dfp = NULL;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -829,11 +939,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
- IP_VS_RT_MODE_TUNNEL, &saddr);
+ IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -844,29 +954,21 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt = skb_rtable(skb);
tdev = rt->dst.dev;
- /* Copy DF, reset fragment offset and MF */
- df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- }
-
- /* fix old IP header checksum */
- ip_send_check(old_iph);
+ /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, NULL, &dsfield,
+ &ttl, dfp);
+ if (IS_ERR(skb))
+ goto tx_error;
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -883,11 +985,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
- iph->protocol = IPPROTO_IPIP;
- iph->tos = tos;
+ iph->protocol = next_protocol;
+ iph->tos = dsfield;
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
- iph->ttl = old_iph->ttl;
+ iph->ttl = ttl;
ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
@@ -920,7 +1022,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ __u8 next_protocol = 0;
+ __u32 payload_len = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -928,7 +1033,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -948,19 +1053,14 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ipv6_hdr(skb);
- }
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, &payload_len,
+ &dsfield, &ttl, NULL);
+ if (IS_ERR(skb))
+ goto tx_error;
- /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
- skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -975,14 +1075,13 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
iph = ipv6_hdr(skb);
iph->version = 6;
- iph->nexthdr = IPPROTO_IPV6;
- iph->payload_len = old_iph->payload_len;
- be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
+ iph->nexthdr = next_protocol;
+ iph->payload_len = htons(payload_len);
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
- ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
+ ipv6_change_dsfield(iph, 0, dsfield);
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
- iph->hop_limit = old_iph->hop_limit;
+ iph->hop_limit = ttl;
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
@@ -1021,10 +1120,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL);
+ IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -1060,8 +1159,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL);
if (local < 0)
@@ -1128,7 +1227,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ NULL, iph);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -1219,8 +1319,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d25f29377648..957c1db66652 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -14,6 +14,30 @@
static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static bool nf_generic_should_process(u8 proto)
+{
+ switch (proto) {
+#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
+ case IPPROTO_SCTP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
+ case IPPROTO_DCCP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
+ case IPPROTO_GRE:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
+ case IPPROTO_UDPLITE:
+ return false;
+#endif
+ default:
+ return true;
+ }
+}
+
static inline struct nf_generic_net *generic_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.generic;
@@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct,
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return true;
+ return nf_generic_should_process(nf_ct_protonum(ct));
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 82374601577e..19e79f0d9ad2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -405,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -420,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -448,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table);
+ err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -488,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_table_info(skb,
+ if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWTABLE,
@@ -540,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
family, table);
if (err < 0)
@@ -914,9 +914,9 @@ nla_put_failure:
return -ENOSPC;
}
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
@@ -930,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
@@ -988,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -1031,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWCHAIN,
NLM_F_MULTI,
@@ -1090,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
family, table, chain);
if (err < 0)
@@ -1647,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
.len = NFT_USERDATA_MAXLEN },
};
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
const struct nft_rule *rule)
@@ -1668,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
@@ -1724,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
@@ -1771,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
@@ -1837,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule);
if (err < 0)
@@ -2321,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -2342,6 +2344,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->policy != NFT_SET_POL_PERFORMANCE) {
+ if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+ goto nla_put_failure;
+ }
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
@@ -2667,6 +2674,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
+ set->policy = policy;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2925,7 +2933,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
goto nla_put_failure;
@@ -3006,7 +3014,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -3293,6 +3301,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
return err;
}
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlmsg_report(nlh) &&
+ !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ goto err;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0) {
+ kfree_skb(skb2);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+ NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+ if (err < 0) {
+ nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ err);
+ }
+ return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ struct sk_buff *skb2;
+ int err;
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
.call_batch = nf_tables_newtable,
@@ -3369,6 +3458,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_GETGEN] = {
+ .call = nf_tables_getgen,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -3526,6 +3618,8 @@ static int nf_tables_commit(struct sk_buff *skb)
call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
}
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
return 0;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c138b8fbe280..f77d3f7f22b5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -333,7 +333,7 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
- ss->abort(skb);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
goto replay;
@@ -357,9 +357,9 @@ ack:
}
done:
if (success && done)
- ss->commit(skb);
+ ss->commit(oskb);
else
- ss->abort(skb);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 272ae4d6fdf4..133eb4772f12 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1101,22 +1101,11 @@ static const struct seq_operations xt_match_seq_ops = {
static int xt_match_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_match_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
@@ -1165,22 +1154,11 @@ static const struct seq_operations xt_target_seq_ops = {
static int xt_target_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_target_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index cb70f6ec5695..5732cd64acc0 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -366,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ADT_OPT(add_opt, par->family, info->add_set.dim,
+ info->add_set.flags, info->flags, info->timeout);
+ ADT_OPT(del_opt, par->family, info->del_set.dim,
+ info->del_set.flags, 0, UINT_MAX);
+ ADT_OPT(map_opt, par->family, info->map_set.dim,
+ info->map_set.flags, 0, UINT_MAX);
+
+ int ret;
+
+ /* Normalize to fit into jiffies */
+ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+ add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_add(info->add_set.index, skb, par, &add_opt);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_del(info->del_set.index, skb, par, &del_opt);
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+ IPSET_FLAG_MAP_SKBPRIO |
+ IPSET_FLAG_MAP_SKBQUEUE);
+ ret = match_set(info->map_set.index, skb, par, &map_opt,
+ info->map_set.flags & IPSET_INV_MATCH);
+ if (!ret)
+ return XT_CONTINUE;
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+ skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+ ^ (map_opt.ext.skbmark);
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+ skb->priority = map_opt.ext.skbprio;
+ if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+ skb->dev &&
+ skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+ skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+ }
+ return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ip_set_id_t index;
+
+ if (info->add_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->add_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->del_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->del_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ if (strncmp(par->table, "mangle", 7)) {
+ pr_warn("--map-set only usable from mangle table\n");
+ return -EINVAL;
+ }
+ if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+ (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+ !(par->hook_mask & (1 << NF_INET_FORWARD |
+ 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING))) {
+ pr_warn("mapping of prio or/and queue is allowed only"
+ "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ return -EINVAL;
+ }
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->map_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find map_set index %u as target\n",
+ info->map_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->del_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->add_set.dim > IPSET_DIM_MAX ||
+ info->del_set.dim > IPSET_DIM_MAX ||
+ info->map_set.dim > IPSET_DIM_MAX) {
+ pr_warn("Protocol error: SET target dimension "
+ "is over the limit!\n");
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
@@ -493,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = {
.destroy = set_target_v2_destroy,
.me = THIS_MODULE
},
+ /* --map-set support */
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
};
static int __init xt_set_init(void)