aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-01-05 20:18:50 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-05 20:18:50 -0500
commit855404efae0d449cc491978d54ea5d117a3cb271 (patch)
tree3c44948365a77058d8b1f2ed6e6683bfc52ef256
parentMerge branch 'bgmac' (diff)
parentnetfilter: x_tables: lightweight process control group matching (diff)
downloadlinux-dev-855404efae0d449cc491978d54ea5d117a3cb271.tar.xz
linux-dev-855404efae0d449cc491978d54ea5d117a3cb271.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== netfilter/IPVS updates for net-next The following patchset contains Netfilter updates for your net-next tree, they are: * Add full port randomization support. Some crazy researchers found a way to reconstruct the secure ephemeral ports that are allocated in random mode by sending off-path bursts of UDP packets to overrun the socket buffer of the DNS resolver to trigger retransmissions, then if the timing for the DNS resolution done by a client is larger than usual, then they conclude that the port that received the burst of UDP packets is the one that was opened. It seems a bit aggressive method to me but it seems to work for them. As a result, Daniel Borkmann and Hannes Frederic Sowa came up with a new NAT mode to fully randomize ports using prandom. * Add a new classifier to x_tables based on the socket net_cls set via cgroups. These includes two patches to prepare the field as requested by Zefan Li. Also from Daniel Borkmann. * Use prandom instead of get_random_bytes in several locations of the netfilter code, from Florian Westphal. * Allow to use the CTA_MARK_MASK in ctnetlink when mangling the conntrack mark, also from Florian Westphal. * Fix compilation warning due to unused variable in IPVS, from Geert Uytterhoeven. * Add support for UID/GID via nfnetlink_queue, from Valentina Giusti. * Add IPComp extension to x_tables, from Fan Du. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/cgroups/net_cls.txt5
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/netfilter/ipset/ip_set.h1
-rw-r--r--include/net/cls_cgroup.h40
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h2
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h1
-rw-r--r--include/net/netns/conntrack.h33
-rw-r--r--include/net/netprio_cgroup.h18
-rw-r--r--include/net/sock.h2
-rw-r--r--include/uapi/linux/netfilter/Kbuild2
-rw-r--r--include/uapi/linux/netfilter/nf_nat.h12
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_queue.h5
-rw-r--r--include/uapi/linux/netfilter/xt_cgroup.h11
-rw-r--r--include/uapi/linux/netfilter/xt_ipcomp.h16
-rw-r--r--net/Kconfig11
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/netclassid_cgroup.c120
-rw-r--r--net/core/sock.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/netfilter/Kconfig19
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_core.c28
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c15
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_proto_common.c10
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nfnetlink_queue_core.c34
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/xt_CT.c4
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netfilter/xt_cgroup.c71
-rw-r--r--net/netfilter/xt_connlimit.c2
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netfilter/xt_ipcomp.c111
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/sched/Kconfig1
-rw-r--r--net/sched/cls_cgroup.c111
42 files changed, 487 insertions, 274 deletions
diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroups/net_cls.txt
index 9face6bb578a..ec182346dea2 100644
--- a/Documentation/cgroups/net_cls.txt
+++ b/Documentation/cgroups/net_cls.txt
@@ -6,6 +6,8 @@ tag network packets with a class identifier (classid).
The Traffic Controller (tc) can be used to assign
different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
Creating a net_cls cgroups instance creates a net_cls.classid file.
This net_cls.classid value is initialized to 0.
@@ -32,3 +34,6 @@ tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
- creating traffic class 10:1
tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example:
+iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index b613ffd402d1..7b99d717411d 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -31,7 +31,7 @@ SUBSYS(devices)
SUBSYS(freezer)
#endif
-#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
SUBSYS(net_cls)
#endif
@@ -43,7 +43,7 @@ SUBSYS(blkio)
SUBSYS(perf)
#endif
-#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_PRIO)
SUBSYS(net_prio)
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 51c0fe258163..0c30af38be0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1444,7 +1444,7 @@ struct net_device {
/* max exchange id for FCoE LRO by ddp */
unsigned int fcoe_ddp_xid;
#endif
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map __rcu *priomap;
#endif
/* phy device may attach itself for hardware timestamping */
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index c7174b816674..0c7d01eae56c 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -331,7 +331,6 @@ extern ip_set_id_t ip_set_get_byname(struct net *net,
const char *name, struct ip_set **set);
extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
-extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name);
extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 33d03b648646..9cf2d5ef38d9 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -16,17 +16,16 @@
#include <linux/cgroup.h>
#include <linux/hardirq.h>
#include <linux/rcupdate.h>
+#include <net/sock.h>
-#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-struct cgroup_cls_state
-{
+#ifdef CONFIG_CGROUP_NET_CLASSID
+struct cgroup_cls_state {
struct cgroup_subsys_state css;
u32 classid;
};
-void sock_update_classid(struct sock *sk);
+struct cgroup_cls_state *task_cls_state(struct task_struct *p);
-#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
static inline u32 task_cls_classid(struct task_struct *p)
{
u32 classid;
@@ -41,33 +40,18 @@ static inline u32 task_cls_classid(struct task_struct *p)
return classid;
}
-#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
-static inline u32 task_cls_classid(struct task_struct *p)
-{
- struct cgroup_subsys_state *css;
- u32 classid = 0;
-
- if (in_interrupt())
- return 0;
-
- rcu_read_lock();
- css = task_css(p, net_cls_subsys_id);
- if (css)
- classid = container_of(css,
- struct cgroup_cls_state, css)->classid;
- rcu_read_unlock();
- return classid;
-}
-#endif
-#else /* !CGROUP_NET_CLS_CGROUP */
static inline void sock_update_classid(struct sock *sk)
{
-}
+ u32 classid;
-static inline u32 task_cls_classid(struct task_struct *p)
+ classid = task_cls_classid(current);
+ if (classid != sk->sk_classid)
+ sk->sk_classid = classid;
+}
+#else /* !CONFIG_CGROUP_NET_CLASSID */
+static inline void sock_update_classid(struct sock *sk)
{
- return 0;
}
-#endif /* CGROUP_NET_CLS_CGROUP */
+#endif /* CONFIG_CGROUP_NET_CLASSID */
#endif /* _NET_CLS_CGROUP_H */
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 6c3d12e2949f..981c327374da 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -19,6 +19,4 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
-void need_ipv4_conntrack(void);
-
#endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 3efab704b7eb..adc1fa3dd7ab 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -87,7 +87,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index c9c0c538b68b..fbcc7fa536dc 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -65,6 +65,23 @@ struct nf_ip_net {
struct netns_ct {
atomic_t count;
unsigned int expect_count;
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+ struct ctl_table_header *acct_sysctl_header;
+ struct ctl_table_header *tstamp_sysctl_header;
+ struct ctl_table_header *event_sysctl_header;
+ struct ctl_table_header *helper_sysctl_header;
+#endif
+ char *slabname;
+ unsigned int sysctl_log_invalid; /* Log invalid packets */
+ unsigned int sysctl_events_retry_timeout;
+ int sysctl_events;
+ int sysctl_acct;
+ int sysctl_auto_assign_helper;
+ bool auto_assign_helper_warned;
+ int sysctl_tstamp;
+ int sysctl_checksum;
+
unsigned int htable_size;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
@@ -75,14 +92,6 @@ struct netns_ct {
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
- int sysctl_events;
- unsigned int sysctl_events_retry_timeout;
- int sysctl_acct;
- int sysctl_tstamp;
- int sysctl_checksum;
- unsigned int sysctl_log_invalid; /* Log invalid packets */
- int sysctl_auto_assign_helper;
- bool auto_assign_helper_warned;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
@@ -92,13 +101,5 @@ struct netns_ct {
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
#endif
-#ifdef CONFIG_SYSCTL
- struct ctl_table_header *sysctl_header;
- struct ctl_table_header *acct_sysctl_header;
- struct ctl_table_header *tstamp_sysctl_header;
- struct ctl_table_header *event_sysctl_header;
- struct ctl_table_header *helper_sysctl_header;
-#endif
- char *slabname;
};
#endif
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index 099d02782e22..dafc09f0fdbc 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -13,12 +13,12 @@
#ifndef _NETPRIO_CGROUP_H
#define _NETPRIO_CGROUP_H
+
#include <linux/cgroup.h>
#include <linux/hardirq.h>
#include <linux/rcupdate.h>
-
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map {
struct rcu_head rcu;
u32 priomap_len;
@@ -27,8 +27,7 @@ struct netprio_map {
void sock_update_netprioidx(struct sock *sk);
-#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
-
+#if IS_BUILTIN(CONFIG_CGROUP_NET_PRIO)
static inline u32 task_netprioidx(struct task_struct *p)
{
struct cgroup_subsys_state *css;
@@ -40,9 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
rcu_read_unlock();
return idx;
}
-
-#elif IS_MODULE(CONFIG_NETPRIO_CGROUP)
-
+#elif IS_MODULE(CONFIG_CGROUP_NET_PRIO)
static inline u32 task_netprioidx(struct task_struct *p)
{
struct cgroup_subsys_state *css;
@@ -56,9 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
return idx;
}
#endif
-
-#else /* !CONFIG_NETPRIO_CGROUP */
-
+#else /* !CONFIG_CGROUP_NET_PRIO */
static inline u32 task_netprioidx(struct task_struct *p)
{
return 0;
@@ -66,6 +61,5 @@ static inline u32 task_netprioidx(struct task_struct *p)
#define sock_update_netprioidx(sk)
-#endif /* CONFIG_NETPRIO_CGROUP */
-
+#endif /* CONFIG_CGROUP_NET_PRIO */
#endif /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 8d9af66ccf2c..5c3f7c3624aa 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -395,7 +395,7 @@ struct sock {
unsigned short sk_ack_backlog;
unsigned short sk_max_ack_backlog;
__u32 sk_priority;
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
__u32 sk_cgrp_prioidx;
#endif
struct pid *sk_peer_pid;
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 17c3af2c4bb9..2344f5a319fc 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -39,6 +39,7 @@ header-y += xt_TEE.h
header-y += xt_TPROXY.h
header-y += xt_addrtype.h
header-y += xt_bpf.h
+header-y += xt_cgroup.h
header-y += xt_cluster.h
header-y += xt_comment.h
header-y += xt_connbytes.h
@@ -54,6 +55,7 @@ header-y += xt_ecn.h
header-y += xt_esp.h
header-y += xt_hashlimit.h
header-y += xt_helper.h
+header-y += xt_ipcomp.h
header-y += xt_iprange.h
header-y += xt_ipvs.h
header-y += xt_length.h
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index bf0cc373ffb6..1ad3659102b6 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -4,10 +4,14 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
-#define NF_NAT_RANGE_MAP_IPS 1
-#define NF_NAT_RANGE_PROTO_SPECIFIED 2
-#define NF_NAT_RANGE_PROTO_RANDOM 4
-#define NF_NAT_RANGE_PERSISTENT 8
+#define NF_NAT_RANGE_MAP_IPS (1 << 0)
+#define NF_NAT_RANGE_PROTO_SPECIFIED (1 << 1)
+#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2)
+#define NF_NAT_RANGE_PERSISTENT (1 << 3)
+#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
+
+#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
+ (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
struct nf_nat_ipv4_range {
unsigned int flags;
diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 0132bad79de7..8dd819e2b5fe 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -47,6 +47,8 @@ enum nfqnl_attr_type {
NFQA_CAP_LEN, /* __u32 length of captured packet */
NFQA_SKB_INFO, /* __u32 skb meta information */
NFQA_EXP, /* nf_conntrack_netlink.h */
+ NFQA_UID, /* __u32 sk uid */
+ NFQA_GID, /* __u32 sk gid */
__NFQA_MAX
};
@@ -99,7 +101,8 @@ enum nfqnl_attr_config {
#define NFQA_CFG_F_FAIL_OPEN (1 << 0)
#define NFQA_CFG_F_CONNTRACK (1 << 1)
#define NFQA_CFG_F_GSO (1 << 2)
-#define NFQA_CFG_F_MAX (1 << 3)
+#define NFQA_CFG_F_UID_GID (1 << 3)
+#define NFQA_CFG_F_MAX (1 << 4)
/* flags for NFQA_SKB_INFO */
/* packet appears to have wrong checksums, but they are ok */
diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
new file mode 100644
index 000000000000..43acb7e175f6
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_cgroup.h
@@ -0,0 +1,11 @@
+#ifndef _UAPI_XT_CGROUP_H
+#define _UAPI_XT_CGROUP_H
+
+#include <linux/types.h>
+
+struct xt_cgroup_info {
+ __u32 id;
+ __u32 invert;
+};
+
+#endif /* _UAPI_XT_CGROUP_H */
diff --git a/include/uapi/linux/netfilter/xt_ipcomp.h b/include/uapi/linux/netfilter/xt_ipcomp.h
new file mode 100644
index 000000000000..45c7e40eb8e1
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_ipcomp.h
@@ -0,0 +1,16 @@
+#ifndef _XT_IPCOMP_H
+#define _XT_IPCOMP_H
+
+#include <linux/types.h>
+
+struct xt_ipcomp {
+ __u32 spis[2]; /* Security Parameter Index */
+ __u8 invflags; /* Inverse flags */
+ __u8 hdrres; /* Test of the Reserved Filed */
+};
+
+/* Values for "invflags" field in struct xt_ipcomp. */
+#define XT_IPCOMP_INV_SPI 0x01 /* Invert the sense of spi. */
+#define XT_IPCOMP_INV_MASK 0x01 /* All possible flags. */
+
+#endif /*_XT_IPCOMP_H*/
diff --git a/net/Kconfig b/net/Kconfig
index d334678c0bd8..e411046a62e3 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -238,12 +238,19 @@ config XPS
depends on SMP
default y
-config NETPRIO_CGROUP
+config CGROUP_NET_PRIO
tristate "Network priority cgroup"
depends on CGROUPS
---help---
Cgroup subsystem for use in assigning processes to network priorities on
- a per-interface basis
+ a per-interface basis.
+
+config CGROUP_NET_CLASSID
+ boolean "Network classid cgroup"
+ depends on CGROUPS
+ ---help---
+ Cgroup subsystem for use as general purpose socket classid marker that is
+ being used in cls_cgroup and for netfilter matching.
config NET_RX_BUSY_POLL
boolean
diff --git a/net/core/Makefile b/net/core/Makefile
index b33b996f5dd6..9628c20acff6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
-obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
+obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
+obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 77f43aa373fe..153ee2f8c33e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2741,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
static void skb_update_prio(struct sk_buff *skb)
{
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
new file mode 100644
index 000000000000..719efd541668
--- /dev/null
+++ b/net/core/netclassid_cgroup.c
@@ -0,0 +1,120 @@
+/*
+ * net/core/netclassid_cgroup.c Classid Cgroupfs Handling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+#include <linux/fdtable.h>
+#include <net/cls_cgroup.h>
+#include <net/sock.h>
+
+static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
+}
+
+struct cgroup_cls_state *task_cls_state(struct task_struct *p)
+{
+ return css_cls_state(task_css(p, net_cls_subsys_id));
+}
+EXPORT_SYMBOL_GPL(task_cls_state);
+
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct cgroup_cls_state *cs;
+
+ cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+ if (!cs)
+ return ERR_PTR(-ENOMEM);
+
+ return &cs->css;
+}
+
+static int cgrp_css_online(struct cgroup_subsys_state *css)
+{
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+
+ if (parent)
+ cs->classid = parent->classid;
+
+ return 0;
+}
+
+static void cgrp_css_free(struct cgroup_subsys_state *css)
+{
+ kfree(css_cls_state(css));
+}
+
+static int update_classid(const void *v, struct file *file, unsigned n)
+{
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+
+ if (sock)
+ sock->sk->sk_classid = (u32)(unsigned long)v;
+
+ return 0;
+}
+
+static void cgrp_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
+{
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ void *v = (void *)(unsigned long)cs->classid;
+ struct task_struct *p;
+
+ cgroup_taskset_for_each(p, css, tset) {
+ task_lock(p);
+ iterate_fd(p->files, 0, update_classid, v);
+ task_unlock(p);
+ }
+}
+
+static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return css_cls_state(css)->classid;
+}
+
+static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 value)
+{
+ css_cls_state(css)->classid = (u32) value;
+
+ return 0;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "classid",
+ .read_u64 = read_classid,
+ .write_u64 = write_classid,
+ },
+ { } /* terminate */
+};
+
+struct cgroup_subsys net_cls_subsys = {
+ .name = "net_cls",
+ .css_alloc = cgrp_css_alloc,
+ .css_online = cgrp_css_online,
+ .css_free = cgrp_css_free,
+ .attach = cgrp_attach,
+ .subsys_id = net_cls_subsys_id,
+ .base_cftypes = ss_files,
+ .module = THIS_MODULE,
+};
+
+static int __init init_netclassid_cgroup(void)
+{
+ return cgroup_load_subsys(&net_cls_subsys);
+}
+__initcall(init_netclassid_cgroup);
diff --git a/net/core/sock.c b/net/core/sock.c
index 0358855576a8..85ad6f0d3898 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1307,19 +1307,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
-#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk)
-{
- u32 classid;
-
- classid = task_cls_classid(current);
- if (classid != sk->sk_classid)
- sk->sk_classid = classid;
-}
-EXPORT_SYMBOL(sock_update_classid);
-#endif
-
-#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
void sock_update_netprioidx(struct sock *sk)
{
if (in_interrupt())
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ecd8bec411c9..8127dc802865 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
-
-void need_ipv4_conntrack(void)
-{
- return;
-}
-EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c3398cd99b94..c17902cb5df9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -858,6 +858,16 @@ config NETFILTER_XT_MATCH_BPF
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_CGROUP
+ tristate '"control group" match support'
+ depends on NETFILTER_ADVANCED
+ depends on CGROUPS
+ select CGROUP_NET_CLASSID
+ ---help---
+ Socket/process control group matching allows you to match locally
+ generated packets based on which net_cls control group processes
+ belong to.
+
config NETFILTER_XT_MATCH_CLUSTER
tristate '"cluster" match support'
depends on NF_CONNTRACK
@@ -1035,6 +1045,15 @@ config NETFILTER_XT_MATCH_HL
in the IPv6 header, or the time-to-live field in the IPv4
header of the packet.
+config NETFILTER_XT_MATCH_IPCOMP
+ tristate '"ipcomp" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This match extension allows you to match a range of CPIs(16 bits)
+ inside IPComp header of IPSec packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_IPRANGE
tristate '"iprange" address range match support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 394483b2c193..407fc232f625 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -133,6 +133,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
@@ -142,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bac7e01df67f..de770ec39e51 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -625,34 +625,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
*/
/*
- * Find set by name, reference it once. The reference makes sure the
- * thing pointed to, does not go away under our feet.
- *
- * The nfnl mutex is used in the function.
- */
-ip_set_id_t
-ip_set_nfnl_get(struct net *net, const char *name)
-{
- ip_set_id_t i, index = IPSET_INVALID_ID;
- struct ip_set *s;
- struct ip_set_net *inst = ip_set_pernet(net);
-
- nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
- if (s != NULL && STREQ(s->name, name)) {
- __ip_set_get(s);
- index = i;
- break;
- }
- }
- nfnl_unlock(NFNL_SUBSYS_IPSET);
-
- return index;
-}
-EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
-
-/*
* Find set by index, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f63c2388f38d..db801263ee9f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data)
continue;
}
while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
- int ret = __wait_event_interruptible(*sk_sleep(sk),
+ /* (Ab)use interruptible sleep to avoid increasing
+ * the load avg.
+ */
+ __wait_event_interruptible(*sk_sleep(sk),
sock_writeable(sk) ||
kthread_should_stop());
if (unlikely(kthread_should_stop()))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 43549eb7a7be..8824ed0ccc9c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
-int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff);
-EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
-
DEFINE_SPINLOCK(nf_conntrack_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -361,15 +355,6 @@ begin:
return NULL;
}
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
-{
- return ____nf_conntrack_find(net, zone, tuple,
- hash_conntrack_raw(tuple, zone));
-}
-EXPORT_SYMBOL_GPL(__nf_conntrack_find);
-
/* Find a connection corresponding to a tuple. */
static struct nf_conntrack_tuple_hash *
__nf_conntrack_find_get(struct net *net, u16 zone,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 08870b859046..bb322d0beb48 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
return err;
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK])
- ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ if (cda[CTA_MARK]) {
+ u32 mask = 0, mark, newmark;
+ if (cda[CTA_MARK_MASK])
+ mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+ mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ newmark = (ct->mark & mask) ^ mark;
+ if (newmark != ct->mark)
+ ct->mark = newmark;
+ }
#endif
return 0;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ce3004156eeb..b65d5864b6d9 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
-void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_put);
-
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 63a815402211..d3f5cd6dd962 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* manips not an issue.
*/
if (maniptype == NF_NAT_MANIP_SRC &&
- !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+ !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(l3proto, l4proto, orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
- if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (l4proto->in_range(tuple, maniptype,
&range->min_proto,
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 9baaf734c142..83a72a235cae 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
range_size = ntohs(range->max_proto.all) - min + 1;
}
- if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
+ if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
? tuple->dst.u.all
: tuple->src.u.all);
- else
+ } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
+ off = prandom_u32();
+ } else {
off = *rover;
+ }
for (i = 0; ; ++off) {
*portptr = htons(min + off % range_size);
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
- if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
*rover = off;
return;
}
- return;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 3c4b69e5fe17..7d4254b0dc6b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,8 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
@@ -75,7 +73,6 @@ struct nfulnl_instance {
};
#define INSTANCE_BUCKETS 16
-static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly;
@@ -1066,11 +1063,6 @@ static int __init nfnetlink_log_init(void)
{
int status = -ENOMEM;
- /* it's not really all that important to have a random value, so
- * we can do this from the init function, even if there hasn't
- * been that much entropy yet */
- get_random_bytes(&hash_init, sizeof(hash_init));
-
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 21258cf70091..d3cf12b83174 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -297,6 +297,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
}
+static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
+{
+ const struct cred *cred;
+
+ if (sk->sk_state == TCP_TIME_WAIT)
+ return 0;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ cred = sk->sk_socket->file->f_cred;
+ if (nla_put_be32(skb, NFQA_UID,
+ htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFQA_GID,
+ htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
+ goto nla_put_failure;
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+ return 0;
+
+nla_put_failure:
+ read_unlock_bh(&sk->sk_callback_lock);
+ return -1;
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
@@ -372,6 +397,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (queue->flags & NFQA_CFG_F_CONNTRACK)
ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+ if (queue->flags & NFQA_CFG_F_UID_GID) {
+ size += (nla_total_size(sizeof(u_int32_t)) /* uid */
+ + nla_total_size(sizeof(u_int32_t))); /* gid */
+ }
+
skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
GFP_ATOMIC);
if (!skb)
@@ -484,6 +514,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nla_put_failure;
}
+ if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
+ nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
+ goto nla_put_failure;
+
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3d3f8fce10a5..6aae699aeb46 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -164,7 +164,7 @@ static int nft_hash_init(const struct nft_set *set,
unsigned int cnt, i;
if (unlikely(!nft_hash_rnd_initted)) {
- get_random_bytes(&nft_hash_rnd, 4);
+ nft_hash_rnd = prandom_u32();
nft_hash_rnd_initted = true;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index da35ac06a975..5929be622c5c 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
ret = 0;
if ((info->ct_events || info->exp_events) &&
!nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
- GFP_KERNEL))
+ GFP_KERNEL)) {
+ ret = -EINVAL;
goto err3;
+ }
if (info->helper[0]) {
ret = xt_ct_set_helper(ct, info->helper, par);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 370adf622cef..190854be7629 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -100,7 +100,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
int ret;
if (unlikely(!rnd_inited)) {
- get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+ jhash_rnd = prandom_u32();
rnd_inited = true;
}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
new file mode 100644
index 000000000000..9a8e77e7f8d4
--- /dev/null
+++ b/net/netfilter/xt_cgroup.c
@@ -0,0 +1,71 @@
+/*
+ * Xtables module to match the process control group.
+ *
+ * Might be used to implement individual "per-application" firewall
+ * policies in contrast to global policies based on control groups.
+ * Matching is based upon processes tagged to net_cls' classid marker.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_cgroup.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("Xtables: process control group matching");
+MODULE_ALIAS("ipt_cgroup");
+MODULE_ALIAS("ip6t_cgroup");
+
+static int cgroup_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_cgroup_info *info = par->matchinfo;
+
+ if (info->invert & ~1)
+ return -EINVAL;
+
+ return info->id ? 0 : -EINVAL;
+}
+
+static bool
+cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info *info = par->matchinfo;
+
+ if (skb->sk == NULL)
+ return false;
+
+ return (info->id == skb->sk->sk_classid) ^ info->invert;
+}
+
+static struct xt_match cgroup_mt_reg __read_mostly = {
+ .name = "cgroup",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check,
+ .match = cgroup_mt,
+ .matchsize = sizeof(struct xt_cgroup_info),
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init cgroup_mt_init(void)
+{
+ return xt_register_match(&cgroup_mt_reg);
+}
+
+static void __exit cgroup_mt_exit(void)
+{
+ xt_unregister_match(&cgroup_mt_reg);
+}
+
+module_init(cgroup_mt_init);
+module_exit(cgroup_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index c40b2695633b..7671e8214919 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -229,7 +229,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
u_int32_t rand;
do {
- get_random_bytes(&rand, sizeof(rand));
+ rand = prandom_u32();
} while (!rand);
cmpxchg(&connlimit_rnd, 0, rand);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a3910fc2122b..d819f62b3b7c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -177,7 +177,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
/* initialize hash with random val at the time we allocate
* the first hashtable entry */
if (unlikely(!ht->rnd_initialized)) {
- get_random_bytes(&ht->rnd, sizeof(ht->rnd));
+ ht->rnd = prandom_u32();
ht->rnd_initialized = true;
}
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
new file mode 100644
index 000000000000..a4c7561698c5
--- /dev/null
+++ b/net/netfilter/xt_ipcomp.c
@@ -0,0 +1,111 @@
+/* Kernel module to match IPComp parameters for IPv4 and IPv6
+ *
+ * Copyright (C) 2013 WindRiver
+ *
+ * Author:
+ * Fan Du <fan.du@windriver.com>
+ *
+ * Based on:
+ * net/netfilter/xt_esp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter/xt_ipcomp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
+MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+ bool r;
+ pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, spi, max);
+ r = (spi >= min && spi <= max) ^ invert;
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ip_comp_hdr _comphdr;
+ const struct ip_comp_hdr *chdr;
+ const struct xt_ipcomp *compinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr);
+ if (chdr == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ pr_debug("Dropping evil IPComp tinygram.\n");
+ par->hotdrop = true;
+ return 0;
+ }
+
+ return spi_match(compinfo->spis[0], compinfo->spis[1],
+ ntohl(chdr->cpi << 16),
+ !!(compinfo->invflags & XT_IPCOMP_INV_SPI));
+}
+
+static int comp_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_ipcomp *compinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
+ pr_err("unknown flags %X\n", compinfo->invflags);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_match comp_mt_reg[] __read_mostly = {
+ {
+ .name = "ipcomp",
+ .family = NFPROTO_IPV4,
+ .match = comp_mt,
+ .matchsize = sizeof(struct xt_ipcomp),
+ .proto = IPPROTO_COMP,
+ .checkentry = comp_mt_check,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "ipcomp",
+ .family = NFPROTO_IPV6,
+ .match = comp_mt,
+ .matchsize = sizeof(struct xt_ipcomp),
+ .proto = IPPROTO_COMP,
+ .checkentry = comp_mt_check,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init comp_mt_init(void)
+{
+ return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+static void __exit comp_mt_exit(void)
+{
+ xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
+}
+
+module_init(comp_mt_init);
+module_exit(comp_mt_exit);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1e657cf715c4..bfdc29f1a04a 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -334,7 +334,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
size_t sz;
if (unlikely(!hash_rnd_inited)) {
- get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+ hash_rnd = prandom_u32();
hash_rnd_inited = true;
}
if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 919847beec39..d3d7a0a66e28 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -444,6 +444,7 @@ config NET_CLS_FLOW
config NET_CLS_CGROUP
tristate "Control Group Classifier"
select NET_CLS
+ select CGROUP_NET_CLASSID
depends on CGROUPS
---help---
Say Y here if you want to classify packets based on the control
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index f9d212583ea2..8349fcdc50f3 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -11,109 +11,13 @@
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
#include <linux/skbuff.h>
-#include <linux/cgroup.h>
#include <linux/rcupdate.h>
-#include <linux/fdtable.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
-{
- return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
-}
-
-static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
-{
- return css_cls_state(task_css(p, net_cls_subsys_id));
-}
-
-static struct cgroup_subsys_state *
-cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
-{
- struct cgroup_cls_state *cs;
-
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
- return ERR_PTR(-ENOMEM);
- return &cs->css;
-}
-
-static int cgrp_css_online(struct cgroup_subsys_state *css)
-{
- struct cgroup_cls_state *cs = css_cls_state(css);
- struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
-
- if (parent)
- cs->classid = parent->classid;
- return 0;
-}
-
-static void cgrp_css_free(struct cgroup_subsys_state *css)
-{
- kfree(css_cls_state(css));
-}
-
-static int update_classid(const void *v, struct file *file, unsigned n)
-{
- int err;
- struct socket *sock = sock_from_file(file, &err);
- if (sock)
- sock->sk->sk_classid = (u32)(unsigned long)v;
- return 0;
-}
-
-static void cgrp_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
-{
- struct task_struct *p;
- struct cgroup_cls_state *cs = css_cls_state(css);
- void *v = (void *)(unsigned long)cs->classid;
-
- cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid, v);
- task_unlock(p);
- }
-}
-
-static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
-{
- return css_cls_state(css)->classid;
-}
-
-static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 value)
-{
- css_cls_state(css)->classid = (u32) value;
- return 0;
-}
-
-static struct cftype ss_files[] = {
- {
- .name = "classid",
- .read_u64 = read_classid,
- .write_u64 = write_classid,
- },
- { } /* terminate */
-};
-
-struct cgroup_subsys net_cls_subsys = {
- .name = "net_cls",
- .css_alloc = cgrp_css_alloc,
- .css_online = cgrp_css_online,
- .css_free = cgrp_css_free,
- .attach = cgrp_attach,
- .subsys_id = net_cls_subsys_id,
- .base_cftypes = ss_files,
- .module = THIS_MODULE,
-};
-
struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
@@ -305,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
static int __init init_cgroup_cls(void)
{
- int ret;
-
- ret = cgroup_load_subsys(&net_cls_subsys);
- if (ret)
- goto out;
-
- ret = register_tcf_proto_ops(&cls_cgroup_ops);
- if (ret)
- cgroup_unload_subsys(&net_cls_subsys);
-
-out:
- return ret;
+ return register_tcf_proto_ops(&cls_cgroup_ops);
}
static void __exit exit_cgroup_cls(void)
{
unregister_tcf_proto_ops(&cls_cgroup_ops);
-
- cgroup_unload_subsys(&net_cls_subsys);
}
module_init(init_cgroup_cls);