From f703651ef870bd6b94ddc98ae07488b7d3fd9335 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:20:14 +0100 Subject: netfilter: NFNL_SUBSYS_IPSET id and NLA_PUT_NET* macros The patch adds the NFNL_SUBSYS_IPSET id and NLA_PUT_NET* macros to the vanilla kernel. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 3 ++- include/net/netlink.h | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 361d6b5630ee..2b11fc1a86be 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -47,7 +47,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_QUEUE 3 #define NFNL_SUBSYS_ULOG 4 #define NFNL_SUBSYS_OSF 5 -#define NFNL_SUBSYS_COUNT 6 +#define NFNL_SUBSYS_IPSET 6 +#define NFNL_SUBSYS_COUNT 7 #ifdef __KERNEL__ diff --git a/include/net/netlink.h b/include/net/netlink.h index 373f1a900cf4..8a3906a08f5f 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -856,18 +856,27 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_BE16(skb, attrtype, value) \ NLA_PUT_TYPE(skb, __be16, attrtype, value) +#define NLA_PUT_NET16(skb, attrtype, value) \ + NLA_PUT_BE16(skb, attrtype | NLA_F_NET_BYTEORDER, value) + #define NLA_PUT_U32(skb, attrtype, value) \ NLA_PUT_TYPE(skb, u32, attrtype, value) #define NLA_PUT_BE32(skb, attrtype, value) \ NLA_PUT_TYPE(skb, __be32, attrtype, value) +#define NLA_PUT_NET32(skb, attrtype, value) \ + NLA_PUT_BE32(skb, attrtype | NLA_F_NET_BYTEORDER, value) + #define NLA_PUT_U64(skb, attrtype, value) \ NLA_PUT_TYPE(skb, u64, attrtype, value) #define NLA_PUT_BE64(skb, attrtype, value) \ NLA_PUT_TYPE(skb, __be64, attrtype, value) +#define NLA_PUT_NET64(skb, attrtype, value) \ + NLA_PUT_BE64(skb, attrtype | NLA_F_NET_BYTEORDER, value) + #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) -- cgit v1.2.3-59-g8ed1b From a7b4f989a629493bb4ec4a354def784d440b32c4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:28:35 +0100 Subject: netfilter: ipset: IP set core support The patch adds the IP set core support to the kernel. The IP set core implements a netlink (nfnetlink) based protocol by which one can create, destroy, flush, rename, swap, list, save, restore sets, and add, delete, test elements from userspace. For simplicity (and backward compatibilty and for not to force ip(6)tables to be linked with a netlink library) reasons a small getsockopt-based protocol is also kept in order to communicate with the ip(6)tables match and target. The netlink protocol passes all u16, etc values in network order with NLA_F_NET_BYTEORDER flag. The protocol enforces the proper use of the NLA_F_NESTED and NLA_F_NET_BYTEORDER flags. For other kernel subsystems (netfilter match and target) the API contains the functions to add, delete and test elements in sets and the required calls to get/put refereces to the sets before those operations can be performed. The set types (which are implemented in independent modules) are stored in a simple RCU protected list. A set type may have variants: for example without timeout or with timeout support, for IPv4 or for IPv6. The sets (i.e. the pointers to the sets) are stored in an array. The sets are identified by their index in the array, which makes possible easy and fast swapping of sets. The array is protected indirectly by the nfnl mutex from nfnetlink. The content of the sets are protected by the rwlock of the set. There are functional differences between the add/del/test functions for the kernel and userspace: - kernel add/del/test: works on the current packet (i.e. one element) - kernel test: may trigger an "add" operation in order to fill out unspecified parts of the element from the packet (like MAC address) - userspace add/del: works on the netlink message and thus possibly on multiple elements from the IPSET_ATTR_ADT container attribute. - userspace add: may trigger resizing of a set Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set.h | 452 +++++++ include/linux/netfilter/ipset/ip_set_getport.h | 11 + include/linux/netfilter/ipset/pfxlen.h | 35 + net/netfilter/Kconfig | 2 + net/netfilter/Makefile | 3 + net/netfilter/ipset/Kconfig | 26 + net/netfilter/ipset/Makefile | 8 + net/netfilter/ipset/ip_set_core.c | 1662 ++++++++++++++++++++++++ net/netfilter/ipset/ip_set_getport.c | 136 ++ net/netfilter/ipset/pfxlen.c | 291 +++++ 10 files changed, 2626 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set.h create mode 100644 include/linux/netfilter/ipset/ip_set_getport.h create mode 100644 include/linux/netfilter/ipset/pfxlen.h create mode 100644 net/netfilter/ipset/Kconfig create mode 100644 net/netfilter/ipset/Makefile create mode 100644 net/netfilter/ipset/ip_set_core.c create mode 100644 net/netfilter/ipset/ip_set_getport.c create mode 100644 net/netfilter/ipset/pfxlen.c diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h new file mode 100644 index 000000000000..ec333d83f3b4 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set.h @@ -0,0 +1,452 @@ +#ifndef _IP_SET_H +#define _IP_SET_H + +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* The protocol version */ +#define IPSET_PROTOCOL 6 + +/* The max length of strings including NUL: set and type identifiers */ +#define IPSET_MAXNAMELEN 32 + +/* Message types and commands */ +enum ipset_cmd { + IPSET_CMD_NONE, + IPSET_CMD_PROTOCOL, /* 1: Return protocol version */ + IPSET_CMD_CREATE, /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY, /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH, /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME, /* 5: Rename a set */ + IPSET_CMD_SWAP, /* 6: Swap two sets */ + IPSET_CMD_LIST, /* 7: List sets */ + IPSET_CMD_SAVE, /* 8: Save sets */ + IPSET_CMD_ADD, /* 9: Add an element to a set */ + IPSET_CMD_DEL, /* 10: Delete an element from a set */ + IPSET_CMD_TEST, /* 11: Test an element in a set */ + IPSET_CMD_HEADER, /* 12: Get set header data only */ + IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_MSG_MAX, /* Netlink message commands */ + + /* Commands in userspace: */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ + IPSET_CMD_HELP, /* 15: Get help */ + IPSET_CMD_VERSION, /* 16: Get program version */ + IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + + IPSET_CMD_MAX, + + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ +}; + +/* Attributes at command level */ +enum { + IPSET_ATTR_UNSPEC, + IPSET_ATTR_PROTOCOL, /* 1: Protocol version */ + IPSET_ATTR_SETNAME, /* 2: Name of the set */ + IPSET_ATTR_TYPENAME, /* 3: Typename */ + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */ + IPSET_ATTR_REVISION, /* 4: Settype revision */ + IPSET_ATTR_FAMILY, /* 5: Settype family */ + IPSET_ATTR_FLAGS, /* 6: Flags at command level */ + IPSET_ATTR_DATA, /* 7: Nested attributes */ + IPSET_ATTR_ADT, /* 8: Multiple data containers */ + IPSET_ATTR_LINENO, /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + __IPSET_ATTR_CMD_MAX, +}; +#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) + +/* CADT specific attributes */ +enum { + IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IP_FROM = IPSET_ATTR_IP, + IPSET_ATTR_IP_TO, /* 2 */ + IPSET_ATTR_CIDR, /* 3 */ + IPSET_ATTR_PORT, /* 4 */ + IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, + IPSET_ATTR_PORT_TO, /* 5 */ + IPSET_ATTR_TIMEOUT, /* 6 */ + IPSET_ATTR_PROTO, /* 7 */ + IPSET_ATTR_CADT_FLAGS, /* 8 */ + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16, + /* Create-only specific attributes */ + IPSET_ATTR_GC, + IPSET_ATTR_HASHSIZE, + IPSET_ATTR_MAXELEM, + IPSET_ATTR_NETMASK, + IPSET_ATTR_PROBES, + IPSET_ATTR_RESIZE, + IPSET_ATTR_SIZE, + /* Kernel-only */ + IPSET_ATTR_ELEMENTS, + IPSET_ATTR_REFERENCES, + IPSET_ATTR_MEMSIZE, + + __IPSET_ATTR_CREATE_MAX, +}; +#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1) + +/* ADT specific attributes */ +enum { + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1, + IPSET_ATTR_NAME, + IPSET_ATTR_NAMEREF, + IPSET_ATTR_IP2, + IPSET_ATTR_CIDR2, + __IPSET_ATTR_ADT_MAX, +}; +#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) + +/* IP specific attributes */ +enum { + IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IPADDR_IPV6, + __IPSET_ATTR_IPADDR_MAX, +}; +#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1) + +/* Error codes */ +enum ipset_errno { + IPSET_ERR_PRIVATE = 4096, + IPSET_ERR_PROTOCOL, + IPSET_ERR_FIND_TYPE, + IPSET_ERR_MAX_SETS, + IPSET_ERR_BUSY, + IPSET_ERR_EXIST_SETNAME2, + IPSET_ERR_TYPE_MISMATCH, + IPSET_ERR_EXIST, + IPSET_ERR_INVALID_CIDR, + IPSET_ERR_INVALID_NETMASK, + IPSET_ERR_INVALID_FAMILY, + IPSET_ERR_TIMEOUT, + IPSET_ERR_REFERENCED, + IPSET_ERR_IPADDR_IPV4, + IPSET_ERR_IPADDR_IPV6, + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352, +}; + +/* Flags at command level */ +enum ipset_cmd_flags { + IPSET_FLAG_BIT_EXIST = 0, + IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), +}; + +/* Flags at CADT attribute level */ +enum ipset_cadt_flags { + IPSET_FLAG_BIT_BEFORE = 0, + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), +}; + +/* Commands with settype-specific attributes */ +enum ipset_adt { + IPSET_ADD, + IPSET_DEL, + IPSET_TEST, + IPSET_ADT_MAX, + IPSET_CREATE = IPSET_ADT_MAX, + IPSET_CADT_MAX, +}; + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include + +/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t + * and IPSET_INVALID_ID if you want to increase the max number of sets. + */ +typedef u16 ip_set_id_t; + +#define IPSET_INVALID_ID 65535 + +enum ip_set_dim { + IPSET_DIM_ZERO = 0, + IPSET_DIM_ONE, + IPSET_DIM_TWO, + IPSET_DIM_THREE, + /* Max dimension in elements. + * If changed, new revision of iptables match/target is required. + */ + IPSET_DIM_MAX = 6, +}; + +/* Option flags for kernel operations */ +enum ip_set_kopt { + IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO), + IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE), + IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO), + IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE), +}; + +/* Set features */ +enum ip_set_feature { + IPSET_TYPE_IP_FLAG = 0, + IPSET_TYPE_IP = (1 << IPSET_TYPE_IP_FLAG), + IPSET_TYPE_PORT_FLAG = 1, + IPSET_TYPE_PORT = (1 << IPSET_TYPE_PORT_FLAG), + IPSET_TYPE_MAC_FLAG = 2, + IPSET_TYPE_MAC = (1 << IPSET_TYPE_MAC_FLAG), + IPSET_TYPE_IP2_FLAG = 3, + IPSET_TYPE_IP2 = (1 << IPSET_TYPE_IP2_FLAG), + IPSET_TYPE_NAME_FLAG = 4, + IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), + /* Strictly speaking not a feature, but a flag for dumping: + * this settype must be dumped last */ + IPSET_DUMP_LAST_FLAG = 7, + IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), +}; + +struct ip_set; + +typedef int (*ipset_adtfn)(struct ip_set *set, void *value, u32 timeout); + +/* Set type, variant-specific part */ +struct ip_set_type_variant { + /* Kernelspace: test/add/del entries + * returns negative error code, + * zero for no match/success to add/delete + * positive for matching element */ + int (*kadt)(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); + + /* Userspace: test/add/del entries + * returns negative error code, + * zero for no match/success to add/delete + * positive for matching element */ + int (*uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags); + + /* Low level add/del/test functions */ + ipset_adtfn adt[IPSET_ADT_MAX]; + + /* When adding entries and set is full, try to resize the set */ + int (*resize)(struct ip_set *set, bool retried); + /* Destroy the set */ + void (*destroy)(struct ip_set *set); + /* Flush the elements */ + void (*flush)(struct ip_set *set); + /* Expire entries before listing */ + void (*expire)(struct ip_set *set); + /* List set header data */ + int (*head)(struct ip_set *set, struct sk_buff *skb); + /* List elements */ + int (*list)(const struct ip_set *set, struct sk_buff *skb, + struct netlink_callback *cb); + + /* Return true if "b" set is the same as "a" + * according to the create set parameters */ + bool (*same_set)(const struct ip_set *a, const struct ip_set *b); +}; + +/* The core set type structure */ +struct ip_set_type { + struct list_head list; + + /* Typename */ + char name[IPSET_MAXNAMELEN]; + /* Protocol version */ + u8 protocol; + /* Set features to control swapping */ + u8 features; + /* Set type dimension */ + u8 dimension; + /* Supported family: may be AF_UNSPEC for both AF_INET/AF_INET6 */ + u8 family; + /* Type revision */ + u8 revision; + + /* Create set */ + int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags); + + /* Attribute policies */ + const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1]; + const struct nla_policy adt_policy[IPSET_ATTR_ADT_MAX + 1]; + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +/* register and unregister set type */ +extern int ip_set_type_register(struct ip_set_type *set_type); +extern void ip_set_type_unregister(struct ip_set_type *set_type); + +/* A generic IP set */ +struct ip_set { + /* The name of the set */ + char name[IPSET_MAXNAMELEN]; + /* Lock protecting the set data */ + rwlock_t lock; + /* References to the set */ + atomic_t ref; + /* The core set type */ + struct ip_set_type *type; + /* The type variant doing the real job */ + const struct ip_set_type_variant *variant; + /* The actual INET family of the set */ + u8 family; + /* The type specific data */ + void *data; +}; + +/* register and unregister set references */ +extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); +extern void ip_set_put_byindex(ip_set_id_t index); +extern const char * ip_set_name_byindex(ip_set_id_t index); +extern ip_set_id_t ip_set_nfnl_get(const char *name); +extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index); +extern void ip_set_nfnl_put(ip_set_id_t index); + +/* API for iptables set match, and SET target */ +extern int ip_set_add(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); +extern int ip_set_del(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); +extern int ip_set_test(ip_set_id_t id, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags); + +/* Utility functions */ +extern void * ip_set_alloc(size_t size); +extern void ip_set_free(void *members); +extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); +extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); + +static inline int +ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) +{ + __be32 ip; + int ret = ip_set_get_ipaddr4(nla, &ip); + + if (ret) + return ret; + *ipaddr = ntohl(ip); + return 0; +} + +/* Ignore IPSET_ERR_EXIST errors if asked to do so? */ +static inline bool +ip_set_eexist(int ret, u32 flags) +{ + return ret == -IPSET_ERR_EXIST && (flags & IPSET_FLAG_EXIST); +} + +/* Check the NLA_F_NET_BYTEORDER flag */ +static inline bool +ip_set_attr_netorder(struct nlattr *tb[], int type) +{ + return tb[type] && (tb[type]->nla_type & NLA_F_NET_BYTEORDER); +} + +static inline bool +ip_set_optattr_netorder(struct nlattr *tb[], int type) +{ + return !tb[type] || (tb[type]->nla_type & NLA_F_NET_BYTEORDER); +} + +/* Useful converters */ +static inline u32 +ip_set_get_h32(const struct nlattr *attr) +{ + return ntohl(nla_get_be32(attr)); +} + +static inline u16 +ip_set_get_h16(const struct nlattr *attr) +{ + return ntohs(nla_get_be16(attr)); +} + +#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) +#define ipset_nest_end(skb, start) nla_nest_end(skb, start) + +#define NLA_PUT_IPADDR4(skb, type, ipaddr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT_NET32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + +#define NLA_PUT_IPADDR6(skb, type, ipaddrptr) \ +do { \ + struct nlattr *__nested = ipset_nest_start(skb, type); \ + \ + if (!__nested) \ + goto nla_put_failure; \ + NLA_PUT(skb, IPSET_ATTR_IPADDR_IPV6, \ + sizeof(struct in6_addr), ipaddrptr); \ + ipset_nest_end(skb, __nested); \ +} while (0) + +/* Get address from skbuff */ +static inline __be32 +ip4addr(const struct sk_buff *skb, bool src) +{ + return src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; +} + +static inline void +ip4addrptr(const struct sk_buff *skb, bool src, __be32 *addr) +{ + *addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr; +} + +static inline void +ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) +{ + memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr, + sizeof(*addr)); +} + +/* Calculate the bytes required to store the inclusive range of a-b */ +static inline int +bitmap_bytes(u32 a, u32 b) +{ + return 4 * ((((b - a + 8) / 8) + 3) / 4); +} + +/* Interface to iptables/ip6tables */ + +#define SO_IP_SET 83 + +union ip_set_name_index { + char name[IPSET_MAXNAMELEN]; + ip_set_id_t index; +}; + +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +struct ip_set_req_get_set { + unsigned op; + unsigned version; + union ip_set_name_index set; +}; + +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ +/* Uses ip_set_req_get_set */ + +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ +struct ip_set_req_version { + unsigned op; + unsigned version; +}; + +#endif /* __KERNEL__ */ + +#endif /*_IP_SET_H */ diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h new file mode 100644 index 000000000000..694c433298b8 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -0,0 +1,11 @@ +#ifndef _IP_SET_GETPORT_H +#define _IP_SET_GETPORT_H + +extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto); +extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto); +extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, + __be16 *port); + +#endif /*_IP_SET_GETPORT_H*/ diff --git a/include/linux/netfilter/ipset/pfxlen.h b/include/linux/netfilter/ipset/pfxlen.h new file mode 100644 index 000000000000..0e1fb50da562 --- /dev/null +++ b/include/linux/netfilter/ipset/pfxlen.h @@ -0,0 +1,35 @@ +#ifndef _PFXLEN_H +#define _PFXLEN_H + +#include +#include + +/* Prefixlen maps, by Jan Engelhardt */ +extern const union nf_inet_addr ip_set_netmask_map[]; +extern const union nf_inet_addr ip_set_hostmask_map[]; + +static inline __be32 +ip_set_netmask(u8 pfxlen) +{ + return ip_set_netmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_netmask6(u8 pfxlen) +{ + return &ip_set_netmask_map[pfxlen].ip6[0]; +} + +static inline u32 +ip_set_hostmask(u8 pfxlen) +{ + return (__force u32) ip_set_hostmask_map[pfxlen].ip; +} + +static inline const __be32 * +ip_set_hostmask6(u8 pfxlen) +{ + return &ip_set_hostmask_map[pfxlen].ip6[0]; +} + +#endif /*_PFXLEN_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index faf7412ea453..351abf8ace13 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1052,4 +1052,6 @@ endif # NETFILTER_XTABLES endmenu +source "net/netfilter/ipset/Kconfig" + source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9ae6878a85b1..510b586ccb7f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -105,5 +105,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o +# ipset +obj-$(CONFIG_IP_SET) += ipset/ + # IPVS obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig new file mode 100644 index 000000000000..5ade156dd470 --- /dev/null +++ b/net/netfilter/ipset/Kconfig @@ -0,0 +1,26 @@ +menuconfig IP_SET + tristate "IP set support" + depends on INET && NETFILTER + help + This option adds IP set support to the kernel. + In order to define and use the sets, you need the userspace utility + ipset(8). You can use the sets in netfilter via the "set" match + and "SET" target. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_SET + +config IP_SET_MAX + int "Maximum number of IP sets" + default 256 + range 2 65534 + depends on IP_SET + help + You can define here default value of the maximum number + of IP sets for the kernel. + + The value can be overriden by the 'max_sets' module + parameter of the 'ip_set' module. + +endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile new file mode 100644 index 000000000000..910cd425c067 --- /dev/null +++ b/net/netfilter/ipset/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the ipset modules +# + +ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o + +# ipset core +obj-$(CONFIG_IP_SET) += ip_set.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c new file mode 100644 index 000000000000..8a736247e85f --- /dev/null +++ b/net/netfilter/ipset/ip_set_core.c @@ -0,0 +1,1662 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module for IP set management */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static LIST_HEAD(ip_set_type_list); /* all registered set types */ +static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ + +static struct ip_set **ip_set_list; /* all individual sets */ +static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ + +#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) + +static unsigned int max_sets; + +module_param(max_sets, int, 0600); +MODULE_PARM_DESC(max_sets, "maximal number of sets"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("core IP set support"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); + +/* + * The set types are implemented in modules and registered set types + * can be found in ip_set_type_list. Adding/deleting types is + * serialized by ip_set_type_mutex. + */ + +static inline void +ip_set_type_lock(void) +{ + mutex_lock(&ip_set_type_mutex); +} + +static inline void +ip_set_type_unlock(void) +{ + mutex_unlock(&ip_set_type_mutex); +} + +/* Register and deregister settype */ + +static struct ip_set_type * +find_set_type(const char *name, u8 family, u8 revision) +{ + struct ip_set_type *type; + + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) && + (type->family == family || type->family == AF_UNSPEC) && + type->revision == revision) + return type; + return NULL; +} + +/* Unlock, try to load a set type module and lock again */ +static int +try_to_load_type(const char *name) +{ + nfnl_unlock(); + pr_debug("try to load ip_set_%s\n", name); + if (request_module("ip_set_%s", name) < 0) { + pr_warning("Can't find ip_set type %s\n", name); + nfnl_lock(); + return -IPSET_ERR_FIND_TYPE; + } + nfnl_lock(); + return -EAGAIN; +} + +/* Find a set type and reference it */ +static int +find_set_type_get(const char *name, u8 family, u8 revision, + struct ip_set_type **found) +{ + rcu_read_lock(); + *found = find_set_type(name, family, revision); + if (*found) { + int err = !try_module_get((*found)->me); + rcu_read_unlock(); + return err ? -EFAULT : 0; + } + rcu_read_unlock(); + + return try_to_load_type(name); +} + +/* Find a given set type by name and family. + * If we succeeded, the supported minimal and maximum revisions are + * filled out. + */ +static int +find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) +{ + struct ip_set_type *type; + bool found = false; + + *min = *max = 0; + rcu_read_lock(); + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) && + (type->family == family || type->family == AF_UNSPEC)) { + found = true; + if (type->revision < *min) + *min = type->revision; + else if (type->revision > *max) + *max = type->revision; + } + rcu_read_unlock(); + if (found) + return 0; + + return try_to_load_type(name); +} + +#define family_name(f) ((f) == AF_INET ? "inet" : \ + (f) == AF_INET6 ? "inet6" : "any") + +/* Register a set type structure. The type is identified by + * the unique triple of name, family and revision. + */ +int +ip_set_type_register(struct ip_set_type *type) +{ + int ret = 0; + + if (type->protocol != IPSET_PROTOCOL) { + pr_warning("ip_set type %s, family %s, revision %u uses " + "wrong protocol version %u (want %u)\n", + type->name, family_name(type->family), + type->revision, type->protocol, IPSET_PROTOCOL); + return -EINVAL; + } + + ip_set_type_lock(); + if (find_set_type(type->name, type->family, type->revision)) { + /* Duplicate! */ + pr_warning("ip_set type %s, family %s, revision %u " + "already registered!\n", type->name, + family_name(type->family), type->revision); + ret = -EINVAL; + goto unlock; + } + list_add_rcu(&type->list, &ip_set_type_list); + pr_debug("type %s, family %s, revision %u registered.\n", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_type_register); + +/* Unregister a set type. There's a small race with ip_set_create */ +void +ip_set_type_unregister(struct ip_set_type *type) +{ + ip_set_type_lock(); + if (!find_set_type(type->name, type->family, type->revision)) { + pr_warning("ip_set type %s, family %s, revision %u " + "not registered\n", type->name, + family_name(type->family), type->revision); + goto unlock; + } + list_del_rcu(&type->list); + pr_debug("type %s, family %s, revision %u unregistered.\n", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(ip_set_type_unregister); + +/* Utility functions */ +void * +ip_set_alloc(size_t size) +{ + void *members = NULL; + + if (size < KMALLOC_MAX_SIZE) + members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + + if (members) { + pr_debug("%p: allocated with kmalloc\n", members); + return members; + } + + members = vzalloc(size); + if (!members) + return NULL; + pr_debug("%p: allocated with vmalloc\n", members); + + return members; +} +EXPORT_SYMBOL_GPL(ip_set_alloc); + +void +ip_set_free(void *members) +{ + pr_debug("%p: free with %s\n", members, + is_vmalloc_addr(members) ? "vfree" : "kfree"); + if (is_vmalloc_addr(members)) + vfree(members); + else + kfree(members); +} +EXPORT_SYMBOL_GPL(ip_set_free); + +static inline bool +flag_nested(const struct nlattr *nla) +{ + return nla->nla_type & NLA_F_NESTED; +} + +static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { + [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, + [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, +}; + +int +ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + + if (unlikely(!flag_nested(nla))) + return -IPSET_ERR_PROTOCOL; + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) + return -IPSET_ERR_PROTOCOL; + + *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); + +int +ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + + if (unlikely(!flag_nested(nla))) + return -IPSET_ERR_PROTOCOL; + + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) + return -IPSET_ERR_PROTOCOL; + + memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), + sizeof(struct in6_addr)); + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); + +/* + * Creating/destroying/renaming/swapping affect the existence and + * the properties of a set. All of these can be executed from userspace + * only and serialized by the nfnl mutex indirectly from nfnetlink. + * + * Sets are identified by their index in ip_set_list and the index + * is used by the external references (set/SET netfilter modules). + * + * The set behind an index may change by swapping only, from userspace. + */ + +static inline void +__ip_set_get(ip_set_id_t index) +{ + atomic_inc(&ip_set_list[index]->ref); +} + +static inline void +__ip_set_put(ip_set_id_t index) +{ + atomic_dec(&ip_set_list[index]->ref); +} + +/* + * Add, del and test set entries from kernel. + * + * The set behind the index must exist and must be referenced + * so it can't be destroyed (or changed) under our foot. + */ + +int +ip_set_test(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret = 0; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + read_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); + read_unlock_bh(&set->lock); + + if (ret == -EAGAIN) { + /* Type requests element to be completed */ + pr_debug("element must be competed, ADD is triggered\n"); + write_lock_bh(&set->lock); + set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + write_unlock_bh(&set->lock); + ret = 1; + } + + /* Convert error codes to nomatch */ + return (ret < 0 ? 0 : ret); +} +EXPORT_SYMBOL_GPL(ip_set_test); + +int +ip_set_add(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + write_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + write_unlock_bh(&set->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_add); + +int +ip_set_del(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret = 0; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + write_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); + write_unlock_bh(&set->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_del); + +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex must already be activated. + */ +ip_set_id_t +ip_set_get_byname(const char *name, struct ip_set **set) +{ + ip_set_id_t i, index = IPSET_INVALID_ID; + struct ip_set *s; + + for (i = 0; i < ip_set_max; i++) { + s = ip_set_list[i]; + if (s != NULL && STREQ(s->name, name)) { + __ip_set_get(i); + index = i; + *set = s; + } + } + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_get_byname); + +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + * + * The nfnl mutex must already be activated. + */ +void +ip_set_put_byindex(ip_set_id_t index) +{ + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + __ip_set_put(index); + } +} +EXPORT_SYMBOL_GPL(ip_set_put_byindex); + +/* + * Get the name of a set behind a set index. + * We assume the set is referenced, so it does exist and + * can't be destroyed. The set cannot be renamed due to + * the referencing either. + * + * The nfnl mutex must already be activated. + */ +const char * +ip_set_name_byindex(ip_set_id_t index) +{ + const struct ip_set *set = ip_set_list[index]; + + BUG_ON(set == NULL); + BUG_ON(atomic_read(&set->ref) == 0); + + /* Referenced, so it's safe */ + return set->name; +} +EXPORT_SYMBOL_GPL(ip_set_name_byindex); + +/* + * Routines to call by external subsystems, which do not + * call nfnl_lock for us. + */ + +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. + */ +ip_set_id_t +ip_set_nfnl_get(const char *name) +{ + struct ip_set *s; + ip_set_id_t index; + + nfnl_lock(); + index = ip_set_get_byname(name, &s); + nfnl_unlock(); + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_get); + +/* + * Find set by index, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. + */ +ip_set_id_t +ip_set_nfnl_get_byindex(ip_set_id_t index) +{ + if (index > ip_set_max) + return IPSET_INVALID_ID; + + nfnl_lock(); + if (ip_set_list[index]) + __ip_set_get(index); + else + index = IPSET_INVALID_ID; + nfnl_unlock(); + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); + +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + * + * The nfnl mutex is used in the function. + */ +void +ip_set_nfnl_put(ip_set_id_t index) +{ + nfnl_lock(); + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + __ip_set_put(index); + } + nfnl_unlock(); +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_put); + +/* + * Communication protocol with userspace over netlink. + * + * We already locked by nfnl_lock. + */ + +static inline bool +protocol_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] || + nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; +} + +static inline u32 +flag_exist(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; +} + +static struct nlmsghdr * +start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, + enum ipset_cmd cmd) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + sizeof(*nfmsg), flags); + if (nlh == NULL) + return NULL; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + return nlh; +} + +/* Create a set */ + +static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1}, + [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, +}; + +static ip_set_id_t +find_set_id(const char *name) +{ + ip_set_id_t i, index = IPSET_INVALID_ID; + const struct ip_set *set; + + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { + set = ip_set_list[i]; + if (set != NULL && STREQ(set->name, name)) + index = i; + } + return index; +} + +static inline struct ip_set * +find_set(const char *name) +{ + ip_set_id_t index = find_set_id(name); + + return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; +} + +static int +find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +{ + ip_set_id_t i; + + *index = IPSET_INVALID_ID; + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] == NULL) { + if (*index == IPSET_INVALID_ID) + *index = i; + } else if (STREQ(name, ip_set_list[i]->name)) { + /* Name clash */ + *set = ip_set_list[i]; + return -EEXIST; + } + } + if (*index == IPSET_INVALID_ID) + /* No free slot remained */ + return -IPSET_ERR_MAX_SETS; + return 0; +} + +static int +ip_set_create(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set, *clash; + ip_set_id_t index = IPSET_INVALID_ID; + struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; + const char *name, *typename; + u8 family, revision; + u32 flags = flag_exist(nlh); + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_TYPENAME] == NULL || + attr[IPSET_ATTR_REVISION] == NULL || + attr[IPSET_ATTR_FAMILY] == NULL || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])))) + return -IPSET_ERR_PROTOCOL; + + name = nla_data(attr[IPSET_ATTR_SETNAME]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); + pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", + name, typename, family_name(family), revision); + + /* + * First, and without any locks, allocate and initialize + * a normal base set structure. + */ + set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); + if (!set) + return -ENOMEM; + rwlock_init(&set->lock); + strlcpy(set->name, name, IPSET_MAXNAMELEN); + atomic_set(&set->ref, 0); + set->family = family; + + /* + * Next, check that we know the type, and take + * a reference on the type, to make sure it stays available + * while constructing our new set. + * + * After referencing the type, we try to create the type + * specific part of the set without holding any locks. + */ + ret = find_set_type_get(typename, family, revision, &(set->type)); + if (ret) + goto out; + + /* + * Without holding any locks, create private part. + */ + if (attr[IPSET_ATTR_DATA] && + nla_parse(tb, IPSET_ATTR_CREATE_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->create_policy)) { + ret = -IPSET_ERR_PROTOCOL; + goto put_out; + } + + ret = set->type->create(set, tb, flags); + if (ret != 0) + goto put_out; + + /* BTW, ret==0 here. */ + + /* + * Here, we have a valid, constructed set and we are protected + * by nfnl_lock. Find the first free index in ip_set_list and + * check clashing. + */ + if ((ret = find_free_id(set->name, &index, &clash)) != 0) { + /* If this is the same set and requested, ignore error */ + if (ret == -EEXIST && + (flags & IPSET_FLAG_EXIST) && + STREQ(set->type->name, clash->type->name) && + set->type->family == clash->type->family && + set->type->revision == clash->type->revision && + set->variant->same_set(set, clash)) + ret = 0; + goto cleanup; + } + + /* + * Finally! Add our shiny new set to the list, and be done. + */ + pr_debug("create: '%s' created with index %u!\n", set->name, index); + ip_set_list[index] = set; + + return ret; + +cleanup: + set->variant->destroy(set); +put_out: + module_put(set->type->me); +out: + kfree(set); + return ret; +} + +/* Destroy sets */ + +static const struct nla_policy +ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, +}; + +static void +ip_set_destroy_set(ip_set_id_t index) +{ + struct ip_set *set = ip_set_list[index]; + + pr_debug("set: %s\n", set->name); + ip_set_list[index] = NULL; + + /* Must call it without holding any lock */ + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); +} + +static int +ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; + + /* References are protected by the nfnl mutex */ + if (!attr[IPSET_ATTR_SETNAME]) { + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL && + (atomic_read(&ip_set_list[i]->ref))) + return -IPSET_ERR_BUSY; + } + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL) + ip_set_destroy_set(i); + } + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -ENOENT; + else if (atomic_read(&ip_set_list[i]->ref)) + return -IPSET_ERR_BUSY; + + ip_set_destroy_set(i); + } + return 0; +} + +/* Flush sets */ + +static void +ip_set_flush_set(struct ip_set *set) +{ + pr_debug("set: %s\n", set->name); + + write_lock_bh(&set->lock); + set->variant->flush(set); + write_unlock_bh(&set->lock); +} + +static int +ip_set_flush(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -EPROTO; + + if (!attr[IPSET_ATTR_SETNAME]) { + for (i = 0; i < ip_set_max; i++) + if (ip_set_list[i] != NULL) + ip_set_flush_set(ip_set_list[i]); + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -ENOENT; + + ip_set_flush_set(ip_set_list[i]); + } + + return 0; +} + +/* Rename a set */ + +static const struct nla_policy +ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, +}; + +static int +ip_set_rename(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + const char *name2; + ip_set_id_t i; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + if (atomic_read(&set->ref) != 0) + return -IPSET_ERR_REFERENCED; + + name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL && + STREQ(ip_set_list[i]->name, name2)) + return -IPSET_ERR_EXIST_SETNAME2; + } + strncpy(set->name, name2, IPSET_MAXNAMELEN); + + return 0; +} + +/* Swap two sets so that name/index points to the other. + * References and set names are also swapped. + * + * We are protected by the nfnl mutex and references are + * manipulated only by holding the mutex. The kernel interfaces + * do not hold the mutex but the pointer settings are atomic + * so the ip_set_list always contains valid pointers to the sets. + */ + +static int +ip_set_swap(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *from, *to; + ip_set_id_t from_id, to_id; + char from_name[IPSET_MAXNAMELEN]; + u32 from_ref; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; + + from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (from_id == IPSET_INVALID_ID) + return -ENOENT; + + to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); + if (to_id == IPSET_INVALID_ID) + return -IPSET_ERR_EXIST_SETNAME2; + + from = ip_set_list[from_id]; + to = ip_set_list[to_id]; + + /* Features must not change. + * Not an artifical restriction anymore, as we must prevent + * possible loops created by swapping in setlist type of sets. */ + if (!(from->type->features == to->type->features && + from->type->family == to->type->family)) + return -IPSET_ERR_TYPE_MISMATCH; + + /* No magic here: ref munging protected by the nfnl_lock */ + strncpy(from_name, from->name, IPSET_MAXNAMELEN); + from_ref = atomic_read(&from->ref); + + strncpy(from->name, to->name, IPSET_MAXNAMELEN); + atomic_set(&from->ref, atomic_read(&to->ref)); + strncpy(to->name, from_name, IPSET_MAXNAMELEN); + atomic_set(&to->ref, from_ref); + + ip_set_list[from_id] = to; + ip_set_list[to_id] = from; + + return 0; +} + +/* List/save set data */ + +#define DUMP_INIT 0L +#define DUMP_ALL 1L +#define DUMP_ONE 2L +#define DUMP_LAST 3L + +static int +ip_set_dump_done(struct netlink_callback *cb) +{ + if (cb->args[2]) { + pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); + __ip_set_put((ip_set_id_t) cb->args[1]); + } + return 0; +} + +static inline void +dump_attrs(struct nlmsghdr *nlh) +{ + const struct nlattr *attr; + int rem; + + pr_debug("dump nlmsg\n"); + nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { + pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); + } +} + +static int +dump_init(struct netlink_callback *cb) +{ + struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *attr = (void *)nlh + min_len; + ip_set_id_t index; + + /* Second pass, so parser can't fail */ + nla_parse(cda, IPSET_ATTR_CMD_MAX, + attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); + + /* cb->args[0] : dump single set/all sets + * [1] : set index + * [..]: type specific + */ + + if (!cda[IPSET_ATTR_SETNAME]) { + cb->args[0] = DUMP_ALL; + return 0; + } + + index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; + + cb->args[0] = DUMP_ONE; + cb->args[1] = index; + return 0; +} + +static int +ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +{ + ip_set_id_t index = IPSET_INVALID_ID, max; + struct ip_set *set = NULL; + struct nlmsghdr *nlh = NULL; + unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + int ret = 0; + + if (cb->args[0] == DUMP_INIT) { + ret = dump_init(cb); + if (ret < 0) { + nlh = nlmsg_hdr(cb->skb); + /* We have to create and send the error message + * manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(cb->skb, nlh, ret); + return ret; + } + } + + if (cb->args[1] >= ip_set_max) + goto out; + + pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); + max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + for (; cb->args[1] < max; cb->args[1]++) { + index = (ip_set_id_t) cb->args[1]; + set = ip_set_list[index]; + if (set == NULL) { + if (cb->args[0] == DUMP_ONE) { + ret = -ENOENT; + goto out; + } + continue; + } + /* When dumping all sets, we must dump "sorted" + * so that lists (unions of sets) are dumped last. + */ + if (cb->args[0] != DUMP_ONE && + !((cb->args[0] == DUMP_ALL) ^ + (set->type->features & IPSET_DUMP_LAST))) + continue; + pr_debug("List set: %s\n", set->name); + if (!cb->args[2]) { + /* Start listing: make sure set won't be destroyed */ + pr_debug("reference set\n"); + __ip_set_get(index); + } + nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, flags, + IPSET_CMD_LIST); + if (!nlh) { + ret = -EMSGSIZE; + goto release_refcount; + } + NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + switch (cb->args[2]) { + case 0: + /* Core header data */ + NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, + set->type->name); + NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, + set->family); + NLA_PUT_U8(skb, IPSET_ATTR_REVISION, + set->type->revision); + ret = set->variant->head(set, skb); + if (ret < 0) + goto release_refcount; + /* Fall through and add elements */ + default: + read_lock_bh(&set->lock); + ret = set->variant->list(set, skb, cb); + read_unlock_bh(&set->lock); + if (!cb->args[2]) { + /* Set is done, proceed with next one */ + if (cb->args[0] == DUMP_ONE) + cb->args[1] = IPSET_INVALID_ID; + else + cb->args[1]++; + } + goto release_refcount; + } + } + goto out; + +nla_put_failure: + ret = -EFAULT; +release_refcount: + /* If there was an error or set is done, release set */ + if (ret || !cb->args[2]) { + pr_debug("release set %s\n", ip_set_list[index]->name); + __ip_set_put(index); + } + + /* If we dump all sets, continue with dumping last ones */ + if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2]) + cb->args[0] = DUMP_LAST; + +out: + if (nlh) { + nlmsg_end(skb, nlh); + pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); + dump_attrs(nlh); + } + + return ret < 0 ? ret : skb->len; +} + +static int +ip_set_dump(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; + + return netlink_dump_start(ctnl, skb, nlh, + ip_set_dump_start, + ip_set_dump_done); +} + +/* Add, del and test */ + +static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, + [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, +}; + +static int +call_ad(struct sk_buff *skb, struct ip_set *set, + struct nlattr *tb[], enum ipset_adt adt, + u32 flags, bool use_lineno) +{ + int ret, retried = 0; + u32 lineno = 0; + bool eexist = flags & IPSET_FLAG_EXIST; + + do { + write_lock_bh(&set->lock); + ret = set->variant->uadt(set, tb, adt, &lineno, flags); + write_unlock_bh(&set->lock); + } while (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried++)) == 0); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; + if (lineno && use_lineno) { + /* Error in restore/batch mode: send back lineno */ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cmdattr = (void *)nlh + min_len; + u32 *errline; + + nla_parse(cda, IPSET_ATTR_CMD_MAX, + cmdattr, nlh->nlmsg_len - min_len, + ip_set_adt_policy); + + errline = nla_data(cda[IPSET_ATTR_LINENO]); + + *errline = lineno; + } + + return ret; +} + +static int +ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + const struct nlattr *nla; + u32 flags = flag_exist(nlh); + bool use_lineno; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])) || + (attr[IPSET_ATTR_ADT] != NULL && + (!flag_nested(attr[IPSET_ATTR_ADT]) || + attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + use_lineno = !!attr[IPSET_ATTR_LINENO]; + if (attr[IPSET_ATTR_DATA]) { + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); + } else { + int nla_rem; + + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + memset(tb, 0, sizeof(tb)); + if (nla_type(nla) != IPSET_ATTR_DATA || + !flag_nested(nla) || + nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(nla), nla_len(nla), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_ADD, + flags, use_lineno); + if (ret < 0) + return ret; + } + } + return ret; +} + +static int +ip_set_udel(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + const struct nlattr *nla; + u32 flags = flag_exist(nlh); + bool use_lineno; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])) || + (attr[IPSET_ATTR_ADT] != NULL && + (!flag_nested(attr[IPSET_ATTR_ADT]) || + attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + use_lineno = !!attr[IPSET_ATTR_LINENO]; + if (attr[IPSET_ATTR_DATA]) { + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); + } else { + int nla_rem; + + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + memset(tb, 0, sizeof(*tb)); + if (nla_type(nla) != IPSET_ATTR_DATA || + !flag_nested(nla) || + nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(nla), nla_len(nla), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_DEL, + flags, use_lineno); + if (ret < 0) + return ret; + } + } + return ret; +} + +static int +ip_set_utest(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_DATA] == NULL || + !flag_nested(attr[IPSET_ATTR_DATA]))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + + read_lock_bh(&set->lock); + ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0); + read_unlock_bh(&set->lock); + /* Userspace can't trigger element to be re-added */ + if (ret == -EAGAIN) + ret = 1; + + return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; +} + +/* Get headed data of a set */ + +static int +ip_set_header(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + const struct ip_set *set; + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t index; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL)) + return -IPSET_ERR_PROTOCOL; + + index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; + set = ip_set_list[index]; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_HEADER); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get type data */ + +static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, +}; + +static int +ip_set_type(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + u8 family, min, max; + const char *typename; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_TYPENAME] == NULL || + attr[IPSET_ATTR_FAMILY] == NULL)) + return -IPSET_ERR_PROTOCOL; + + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + ret = find_set_type_minmax(typename, family, &min, &max); + if (ret) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_TYPE); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); + nlmsg_end(skb2, nlh2); + + pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get protocol version */ + +static const struct nla_policy +ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, +}; + +static int +ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + int ret = 0; + + if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + return -IPSET_ERR_PROTOCOL; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_PROTOCOL); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { + [IPSET_CMD_CREATE] = { + .call = ip_set_create, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_create_policy, + }, + [IPSET_CMD_DESTROY] = { + .call = ip_set_destroy, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_FLUSH] = { + .call = ip_set_flush, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_RENAME] = { + .call = ip_set_rename, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_SWAP] = { + .call = ip_set_swap, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_LIST] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_SAVE] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_ADD] = { + .call = ip_set_uadd, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_DEL] = { + .call = ip_set_udel, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_TEST] = { + .call = ip_set_utest, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_HEADER] = { + .call = ip_set_header, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_TYPE] = { + .call = ip_set_type, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_type_policy, + }, + [IPSET_CMD_PROTOCOL] = { + .call = ip_set_protocol, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_protocol_policy, + }, +}; + +static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { + .name = "ip_set", + .subsys_id = NFNL_SUBSYS_IPSET, + .cb_count = IPSET_MSG_MAX, + .cb = ip_set_netlink_subsys_cb, +}; + +/* Interface to iptables/ip6tables */ + +static int +ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) +{ + unsigned *op; + void *data; + int copylen = *len, ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optval != SO_IP_SET) + return -EBADF; + if (*len < sizeof(unsigned)) + return -EINVAL; + + data = vmalloc(*len); + if (!data) + return -ENOMEM; + if (copy_from_user(data, user, *len) != 0) { + ret = -EFAULT; + goto done; + } + op = (unsigned *) data; + + if (*op < IP_SET_OP_VERSION) { + /* Check the version at the beginning of operations */ + struct ip_set_req_version *req_version = data; + if (req_version->version != IPSET_PROTOCOL) { + ret = -EPROTO; + goto done; + } + } + + switch (*op) { + case IP_SET_OP_VERSION: { + struct ip_set_req_version *req_version = data; + + if (*len != sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + + req_version->version = IPSET_PROTOCOL; + ret = copy_to_user(user, req_version, + sizeof(struct ip_set_req_version)); + goto done; + } + case IP_SET_OP_GET_BYNAME: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(); + req_get->set.index = find_set_id(req_get->set.name); + nfnl_unlock(); + goto copy; + } + case IP_SET_OP_GET_BYINDEX: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set) || + req_get->set.index >= ip_set_max) { + ret = -EINVAL; + goto done; + } + nfnl_lock(); + strncpy(req_get->set.name, + ip_set_list[req_get->set.index] + ? ip_set_list[req_get->set.index]->name : "", + IPSET_MAXNAMELEN); + nfnl_unlock(); + goto copy; + } + default: + ret = -EBADMSG; + goto done; + } /* end of switch(op) */ + +copy: + ret = copy_to_user(user, data, copylen); + +done: + vfree(data); + if (ret > 0) + ret = 0; + return ret; +} + +static struct nf_sockopt_ops so_set __read_mostly = { + .pf = PF_INET, + .get_optmin = SO_IP_SET, + .get_optmax = SO_IP_SET + 1, + .get = &ip_set_sockfn_get, + .owner = THIS_MODULE, +}; + +static int __init +ip_set_init(void) +{ + int ret; + + if (max_sets) + ip_set_max = max_sets; + if (ip_set_max >= IPSET_INVALID_ID) + ip_set_max = IPSET_INVALID_ID - 1; + + ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, + GFP_KERNEL); + if (!ip_set_list) { + pr_err("ip_set: Unable to create ip_set_list\n"); + return -ENOMEM; + } + + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { + pr_err("ip_set: cannot register with nfnetlink.\n"); + kfree(ip_set_list); + return ret; + } + ret = nf_register_sockopt(&so_set); + if (ret != 0) { + pr_err("SO_SET registry failed: %d\n", ret); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + kfree(ip_set_list); + return ret; + } + + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __exit +ip_set_fini(void) +{ + /* There can't be any existing set */ + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + kfree(ip_set_list); + pr_debug("these are the famous last words\n"); +} + +module_init(ip_set_init); +module_exit(ip_set_fini); diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c new file mode 100644 index 000000000000..76737bba28ee --- /dev/null +++ b/net/netfilter/ipset/ip_set_getport.c @@ -0,0 +1,136 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Get Layer-4 data from the packets */ + +#include +#include +#include +#include +#include +#include + +#include + +/* We must handle non-linear skbs */ +static bool +get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, + bool src, __be16 *port, u8 *proto) +{ + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr _tcph; + const struct tcphdr *th; + + th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); + if (th == NULL) + /* No choice either */ + return false; + + *port = src ? th->source : th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr _udph; + const struct udphdr *uh; + + uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); + if (uh == NULL) + /* No choice either */ + return false; + + *port = src ? uh->source : uh->dest; + break; + } + case IPPROTO_ICMP: { + struct icmphdr _ich; + const struct icmphdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); + if (ic == NULL) + return false; + + *port = (__force __be16)htons((ic->type << 8) | ic->code); + break; + } + case IPPROTO_ICMPV6: { + struct icmp6hdr _ich; + const struct icmp6hdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); + if (ic == NULL) + return false; + + *port = (__force __be16) + htons((ic->icmp6_type << 8) | ic->icmp6_code); + break; + } + default: + break; + } + *proto = protocol; + + return true; +} + +bool +ip_set_get_ip4_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned int protooff = ip_hdrlen(skb); + int protocol = iph->protocol; + + /* See comments at tcp_match in ip_tables.c */ + if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + return false; + + return get_port(skb, protocol, protooff, src, port, proto); +} +EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); + +bool +ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + unsigned int protooff = 0; + int protocol; + unsigned short fragoff; + + protocol = ipv6_find_hdr(skb, &protooff, -1, &fragoff); + if (protocol <= 0 || fragoff) + return false; + + return get_port(skb, protocol, protooff, src, port, proto); +} +EXPORT_SYMBOL_GPL(ip_set_get_ip6_port); + +bool +ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) +{ + bool ret; + u8 proto; + + switch (pf) { + case AF_INET: + ret = ip_set_get_ip4_port(skb, src, port, &proto); + case AF_INET6: + ret = ip_set_get_ip6_port(skb, src, port, &proto); + default: + return false; + } + if (!ret) + return ret; + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + return true; + default: + return false; + } +} +EXPORT_SYMBOL_GPL(ip_set_get_ip_port); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c new file mode 100644 index 000000000000..23f8c8162214 --- /dev/null +++ b/net/netfilter/ipset/pfxlen.c @@ -0,0 +1,291 @@ +#include + +/* + * Prefixlen maps for fast conversions, by Jan Engelhardt. + */ + +#define E(a, b, c, d) \ + {.ip6 = { \ + __constant_htonl(a), __constant_htonl(b), \ + __constant_htonl(c), __constant_htonl(d), \ + } } + +/* + * This table works for both IPv4 and IPv6; + * just use prefixlen_netmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_netmask_map[] = { + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), +}; +EXPORT_SYMBOL_GPL(ip_set_netmask_map); + +#undef E +#define E(a, b, c, d) \ + {.ip6 = { (__force __be32) a, (__force __be32) b, \ + (__force __be32) c, (__force __be32) d, \ + } } + +/* + * This table works for both IPv4 and IPv6; + * just use prefixlen_hostmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_hostmask_map[] = { + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), +}; +EXPORT_SYMBOL_GPL(ip_set_hostmask_map); -- cgit v1.2.3-59-g8ed1b From 72205fc68bd13109576aa6c4c12c740962d28a6c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:33:17 +0100 Subject: netfilter: ipset: bitmap:ip set type support The module implements the bitmap:ip set type in two flavours, without and with timeout support. In this kind of set one can store IPv4 addresses (or network addresses) from a given range. In order not to waste memory, the timeout version does not rely on the kernel timer for every element to be timed out but on garbage collection. All set types use this mechanism. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_bitmap.h | 31 ++ include/linux/netfilter/ipset/ip_set_timeout.h | 127 ++++++ net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_bitmap_ip.c | 588 +++++++++++++++++++++++++ 5 files changed, 758 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_bitmap.h create mode 100644 include/linux/netfilter/ipset/ip_set_timeout.h create mode 100644 net/netfilter/ipset/ip_set_bitmap_ip.c diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h new file mode 100644 index 000000000000..61a9e8746c83 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -0,0 +1,31 @@ +#ifndef __IP_SET_BITMAP_H +#define __IP_SET_BITMAP_H + +/* Bitmap type specific error codes */ +enum { + /* The element is out of the range of the set */ + IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, + /* The range exceeds the size limit of the set type */ + IPSET_ERR_BITMAP_RANGE_SIZE, +}; + +#ifdef __KERNEL__ +#define IPSET_BITMAP_MAX_RANGE 0x0000FFFF + +/* Common functions */ + +static inline u32 +range_to_mask(u32 from, u32 to, u8 *bits) +{ + u32 mask = 0xFFFFFFFE; + + *bits = 32; + while (--(*bits) > 0 && mask && (to & mask) != from) + mask <<= 1; + + return mask; +} + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_BITMAP_H */ diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h new file mode 100644 index 000000000000..9f30c5f2ec1c --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -0,0 +1,127 @@ +#ifndef _IP_SET_TIMEOUT_H +#define _IP_SET_TIMEOUT_H + +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +/* How often should the gc be run by default */ +#define IPSET_GC_TIME (3 * 60) + +/* Timeout period depending on the timeout value of the given set */ +#define IPSET_GC_PERIOD(timeout) \ + ((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1) + +/* Set is defined without timeout support: timeout value may be 0 */ +#define IPSET_NO_TIMEOUT UINT_MAX + +#define with_timeout(timeout) ((timeout) != IPSET_NO_TIMEOUT) + +static inline unsigned int +ip_set_timeout_uget(struct nlattr *tb) +{ + unsigned int timeout = ip_set_get_h32(tb); + + /* Userspace supplied TIMEOUT parameter: adjust crazy size */ + return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; +} + +#ifdef IP_SET_BITMAP_TIMEOUT + +/* Bitmap specific timeout constants and macros for the entries */ + +/* Bitmap entry is unset */ +#define IPSET_ELEM_UNSET 0 +/* Bitmap entry is set with no timeout value */ +#define IPSET_ELEM_PERMANENT (UINT_MAX/2) + +static inline bool +ip_set_timeout_test(unsigned long timeout) +{ + return timeout != IPSET_ELEM_UNSET && + (timeout == IPSET_ELEM_PERMANENT || + time_after(timeout, jiffies)); +} + +static inline bool +ip_set_timeout_expired(unsigned long timeout) +{ + return timeout != IPSET_ELEM_UNSET && + timeout != IPSET_ELEM_PERMANENT && + time_before(timeout, jiffies); +} + +static inline unsigned long +ip_set_timeout_set(u32 timeout) +{ + unsigned long t; + + if (!timeout) + return IPSET_ELEM_PERMANENT; + + t = timeout * HZ + jiffies; + if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT) + /* Bingo! */ + t++; + + return t; +} + +static inline u32 +ip_set_timeout_get(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; +} + +#else + +/* Hash specific timeout constants and macros for the entries */ + +/* Hash entry is set with no timeout value */ +#define IPSET_ELEM_PERMANENT 0 + +static inline bool +ip_set_timeout_test(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT || + time_after(timeout, jiffies); +} + +static inline bool +ip_set_timeout_expired(unsigned long timeout) +{ + return timeout != IPSET_ELEM_PERMANENT && + time_before(timeout, jiffies); +} + +static inline unsigned long +ip_set_timeout_set(u32 timeout) +{ + unsigned long t; + + if (!timeout) + return IPSET_ELEM_PERMANENT; + + t = timeout * HZ + jiffies; + if (t == IPSET_ELEM_PERMANENT) + /* Bingo! :-) */ + t++; + + return t; +} + +static inline u32 +ip_set_timeout_get(unsigned long timeout) +{ + return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ; +} +#endif /* ! IP_SET_BITMAP_TIMEOUT */ + +#endif /* __KERNEL__ */ + +#endif /* _IP_SET_TIMEOUT_H */ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 5ade156dd470..b63a8eea183c 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -23,4 +23,13 @@ config IP_SET_MAX The value can be overriden by the 'max_sets' module parameter of the 'ip_set' module. +config IP_SET_BITMAP_IP + tristate "bitmap:ip set support" + depends on IP_SET + help + This option adds the bitmap:ip set type support, by which one + can store IPv4 addresses (or network addresse) from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 910cd425c067..ea1c85e28af1 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -6,3 +6,6 @@ ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o # ipset core obj-$(CONFIG_IP_SET) += ip_set.o + +# bitmap types +obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c new file mode 100644 index 000000000000..047440085509 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define IP_SET_BITMAP_TIMEOUT +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:ip type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip"); + +/* Type structure */ +struct bitmap_ip { + void *members; /* the set members */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 elements; /* number of max elements in the set */ + u32 hosts; /* number of hosts in a subnet */ + size_t memsize; /* members size */ + u8 netmask; /* subnet netmask */ + u32 timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +/* Base variant */ + +static inline u32 +ip_to_id(const struct bitmap_ip *m, u32 ip) +{ + return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; +} + +static int +bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + return !!test_bit(id, map->members); +} + +static int +bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_ip_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_ip_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!test_bit(id, map->members)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ip *map = set->data; + const unsigned long *members = map->members; + u16 id = *(u16 *)value; + + return ip_set_timeout_test(members[id]); +} + +static int +bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + + if (ip_set_timeout_test(members[id])) + return -IPSET_ERR_EXIST; + + members[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + int ret = -IPSET_ERR_EXIST; + + if (ip_set_timeout_test(members[id])) + ret = 0; + + members[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_ip_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *adt, *nested; + u32 id, first = cb->args[2]; + const unsigned long *members = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EMSGSIZE; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!ip_set_timeout_test(members[id])) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id]))); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_ip *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip; + + ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + ip = ip_to_id(map, ip); + + return adtfn(set, &ip, map->timeout); +} + +static int +bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct bitmap_ip *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 timeout = map->timeout; + u32 ip, ip_to, id; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + id = ip_to_id(map, ip); + return adtfn(set, &id, timeout); + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) { + swap(ip, ip_to); + if (ip < map->first_ip) + return -IPSET_ERR_BITMAP_RANGE; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + if (ip_to > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + for (; !before(ip_to, ip); ip += map->hosts) { + id = ip_to_id(map, ip); + ret = adtfn(set, &id, timeout);; + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static void +bitmap_ip_destroy(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_ip_flush(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_ip *x = a->data; + const struct bitmap_ip *y = b->data; + + return x->first_ip == y->first_ip && + x->last_ip == y->last_ip && + x->netmask == y->netmask && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_ip = { + .kadt = bitmap_ip_kadt, + .uadt = bitmap_ip_uadt, + .adt = { + [IPSET_ADD] = bitmap_ip_add, + [IPSET_DEL] = bitmap_ip_del, + [IPSET_TEST] = bitmap_ip_test, + }, + .destroy = bitmap_ip_destroy, + .flush = bitmap_ip_flush, + .head = bitmap_ip_head, + .list = bitmap_ip_list, + .same_set = bitmap_ip_same_set, +}; + +static const struct ip_set_type_variant bitmap_tip = { + .kadt = bitmap_ip_kadt, + .uadt = bitmap_ip_uadt, + .adt = { + [IPSET_ADD] = bitmap_ip_tadd, + [IPSET_DEL] = bitmap_ip_tdel, + [IPSET_TEST] = bitmap_ip_ttest, + }, + .destroy = bitmap_ip_destroy, + .flush = bitmap_ip_flush, + .head = bitmap_ip_head, + .list = bitmap_ip_tlist, + .same_set = bitmap_ip_same_set, +}; + +static void +bitmap_ip_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ip *map = set->data; + unsigned long *table = map->members; + u32 id; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id < map->elements; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_ip_gc_init(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ip_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static bool +init_map_ip(struct ip_set *set, struct bitmap_ip *map, + u32 first_ip, u32 last_ip, + u32 elements, u32 hosts, u8 netmask) +{ + map->members = ip_set_alloc(map->memsize); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + map->elements = elements; + map->hosts = hosts; + map->netmask = netmask; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_INET; + + return true; +} + +static int +bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct bitmap_ip *map; + u32 first_ip, last_ip, hosts, elements; + u8 netmask = 32; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); + if (ret) + return ret; + if (first_ip > last_ip) { + u32 tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~ip_set_hostmask(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if (netmask > 32) + return -IPSET_ERR_INVALID_NETMASK; + + first_ip &= ip_set_hostmask(netmask); + last_ip |= ~ip_set_hostmask(netmask); + } + + if (netmask == 32) { + hosts = 1; + elements = last_ip - first_ip + 1; + } else { + u8 mask_bits; + u32 mask; + + mask = range_to_mask(first_ip, last_ip, &mask_bits); + + if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) || + netmask <= mask_bits) + return -IPSET_ERR_BITMAP_RANGE; + + pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); + hosts = 2 << (32 - netmask - 1); + elements = 2 << (netmask - mask_bits - 1); + } + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; + + pr_debug("hosts %u, elements %u\n", hosts, elements); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->memsize = elements * sizeof(unsigned long); + + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->variant = &bitmap_tip; + + bitmap_ip_gc_init(set); + } else { + map->memsize = bitmap_bytes(0, elements - 1); + + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + + set->variant = &bitmap_ip; + } + return 0; +} + +static struct ip_set_type bitmap_ip_type __read_mostly = { + .name = "bitmap:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_INET, + .revision = 0, + .create = bitmap_ip_create, + .create_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_ip_init(void) +{ + return ip_set_type_register(&bitmap_ip_type); +} + +static void __exit +bitmap_ip_fini(void) +{ + ip_set_type_unregister(&bitmap_ip_type); +} + +module_init(bitmap_ip_init); +module_exit(bitmap_ip_fini); -- cgit v1.2.3-59-g8ed1b From de76021a1bb35e3560afccf741d1119a872aea49 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:35:12 +0100 Subject: netfilter: ipset: bitmap:ip,mac type support The module implements the bitmap:ip,mac set type in two flavours, without and with timeout support. In this kind of set one can store IPv4 address and (source) MAC address pairs. The type supports elements added without the MAC part filled out: when the first matching from kernel happens, the MAC part is automatically filled out. The timing out of the elements stars when an element is complete in the IP,MAC pair. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_bitmap_ipmac.c | 655 ++++++++++++++++++++++++++++++ 3 files changed, 665 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_ipmac.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index b63a8eea183c..f18654c73af2 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -32,4 +32,13 @@ config IP_SET_BITMAP_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_BITMAP_IPMAC + tristate "bitmap:ip,mac set support" + depends on IP_SET + help + This option adds the bitmap:ip,mac set type support, by which one + can store IPv4 address and (source) MAC address pairs from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index ea1c85e28af1..f7a099f40d0b 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_IP_SET) += ip_set.o # bitmap types obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o +obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c new file mode 100644 index 000000000000..d826332d2937 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -0,0 +1,655 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:ip,mac type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip,mac"); + +enum { + MAC_EMPTY, /* element is not set */ + MAC_FILLED, /* element is set with MAC */ + MAC_UNSET, /* element is set, without MAC */ +}; + +/* Type structure */ +struct bitmap_ipmac { + void *members; /* the set members */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collector */ + size_t dsize; /* size of element */ +}; + +/* ADT structure for generic function args */ +struct ipmac { + u32 id; /* id in array */ + unsigned char *ether; /* ethernet address */ +}; + +/* Member element without and with timeout */ + +struct ipmac_elem { + unsigned char ether[ETH_ALEN]; + unsigned char match; +} __attribute__ ((aligned)); + +struct ipmac_telem { + unsigned char ether[ETH_ALEN]; + unsigned char match; + unsigned long timeout; +} __attribute__ ((aligned)); + +static inline void * +bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id) +{ + return (void *)((char *)map->members + id * map->dsize); +} + +static inline bool +bitmap_timeout(const struct bitmap_ipmac *map, u32 id) +{ + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +bitmap_expired(const struct bitmap_ipmac *map, u32 id) +{ + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} + +static inline int +bitmap_ipmac_exist(const struct ipmac_telem *elem) +{ + return elem->match == MAC_UNSET || + (elem->match == MAC_FILLED && + !ip_set_timeout_expired(elem->timeout)); +} + +/* Base variant */ + +static int +bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return data->ether == NULL || + compare_ether_addr(data->ether, elem->ether) == 0; + } + return 0; +} + +static int +bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + if (!data->ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address */ + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + break; + case MAC_FILLED: + return -IPSET_ERR_EXIST; + case MAC_EMPTY: + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + } + + return 0; +} + +static int +bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + if (elem->match == MAC_EMPTY) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; + + return 0; +} + +static int +bitmap_ipmac_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_elem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_EMPTY) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return (data->ether == NULL || + compare_ether_addr(data->ether, elem->ether) == 0) && + !bitmap_expired(map, data->id); + } + return 0; +} + +static int +bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + if (!data->ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address and activate the timer */ + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + if (timeout == map->timeout) + /* Timeout was not specified, get stored one */ + timeout = elem->timeout; + elem->timeout = ip_set_timeout_set(timeout); + break; + case MAC_FILLED: + if (!bitmap_expired(map, data->id)) + return -IPSET_ERR_EXIST; + /* Fall through */ + case MAC_EMPTY: + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + /* If MAC is unset yet, we store plain timeout value + * because the timer is not activated yet + * and we can reuse it later when MAC is filled out, + * possibly by the kernel */ + elem->timeout = data->ether ? ip_set_timeout_set(timeout) + : timeout; + break; + } + + return 0; +} + +static int +bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + + if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id)) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; + + return 0; +} + +static int +bitmap_ipmac_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_telem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 timeout, last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (!bitmap_ipmac_exist(elem)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + timeout = elem->match == MAC_UNSET ? elem->timeout + : ip_set_timeout_get(elem->timeout); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return -EMSGSIZE; +} + +static int +bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_ipmac *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct ipmac data; + + data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + if (data.id < map->first_ip || data.id > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + /* Backward compatibility: we don't check the second flag */ + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + data.id -= map->first_ip; + data.ether = eth_hdr(skb)->h_source; + + return adtfn(set, &data, map->timeout); +} + +static int +bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct bitmap_ipmac *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct ipmac data; + u32 timeout = map->timeout; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id); + if (ret) + return ret; + + if (data.id < map->first_ip || data.id > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_ETHER]) + data.ether = nla_data(tb[IPSET_ATTR_ETHER]); + else + data.ether = NULL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + data.id -= map->first_ip; + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static void +bitmap_ipmac_destroy(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_ipmac_flush(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + memset(map->members, 0, + (map->last_ip - map->first_ip + 1) * map->dsize); +} + +static int +bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ipmac *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + + (map->last_ip - map->first_ip + 1) * map->dsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_ipmac *x = a->data; + const struct bitmap_ipmac *y = b->data; + + return x->first_ip == y->first_ip && + x->last_ip == y->last_ip && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_ipmac = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_add, + [IPSET_DEL] = bitmap_ipmac_del, + [IPSET_TEST] = bitmap_ipmac_test, + }, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_list, + .same_set = bitmap_ipmac_same_set, +}; + +static const struct ip_set_type_variant bitmap_tipmac = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_tadd, + [IPSET_DEL] = bitmap_ipmac_tdel, + [IPSET_TEST] = bitmap_ipmac_ttest, + }, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_tlist, + .same_set = bitmap_ipmac_same_set, +}; + +static void +bitmap_ipmac_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ipmac *map = set->data; + struct ipmac_telem *elem; + u32 id, last = map->last_ip - map->first_ip; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) { + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_FILLED && + ip_set_timeout_expired(elem->timeout)) + elem->match = MAC_EMPTY; + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_ipmac_gc_init(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ipmac_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip,mac type of sets */ + +static bool +init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, + u32 first_ip, u32 last_ip) +{ + map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_INET; + + return true; +} + +static int +bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], + u32 flags) +{ + u32 first_ip, last_ip, elements; + struct bitmap_ipmac *map; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); + if (ret) + return ret; + if (first_ip > last_ip) { + u32 tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~ip_set_hostmask(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + elements = last_ip - first_ip + 1; + + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->dsize = sizeof(struct ipmac_telem); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = &bitmap_tipmac; + + bitmap_ipmac_gc_init(set); + } else { + map->dsize = sizeof(struct ipmac_elem); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + set->variant = &bitmap_ipmac; + + } + return 0; +} + +static struct ip_set_type bitmap_ipmac_type = { + .name = "bitmap:ip,mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, + .dimension = IPSET_DIM_TWO, + .family = AF_INET, + .revision = 0, + .create = bitmap_ipmac_create, + .create_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_ipmac_init(void) +{ + return ip_set_type_register(&bitmap_ipmac_type); +} + +static void __exit +bitmap_ipmac_fini(void) +{ + ip_set_type_unregister(&bitmap_ipmac_type); +} + +module_init(bitmap_ipmac_init); +module_exit(bitmap_ipmac_fini); -- cgit v1.2.3-59-g8ed1b From 543261907dc3c4e90845acfcd602ebdbfdfcb4f0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:37:04 +0100 Subject: netfilter: ipset; bitmap:port set type support The module implements the bitmap:port type in two flavours, without and with timeout support to store TCP/UDP ports from a range. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_bitmap_port.c | 520 +++++++++++++++++++++++++++++++ 3 files changed, 530 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_port.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index f18654c73af2..f401e9112703 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -41,4 +41,13 @@ config IP_SET_BITMAP_IPMAC To compile it as a module, choose M here. If unsure, say N. +config IP_SET_BITMAP_PORT + tristate "bitmap:port set support" + depends on IP_SET + help + This option adds the bitmap:port set type support, by which one + can store TCP/UDP port numbers from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index f7a099f40d0b..40866e267752 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_IP_SET) += ip_set.o # bitmap types obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o +obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c new file mode 100644 index 000000000000..92074bb64134 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -0,0 +1,520 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define IP_SET_BITMAP_TIMEOUT +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:port type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:port"); + +/* Type structure */ +struct bitmap_port { + void *members; /* the set members */ + u16 first_port; /* host byte order, included in range */ + u16 last_port; /* host byte order, included in range */ + size_t memsize; /* members size */ + u32 timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +/* Base variant */ + +static int +bitmap_port_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + return !!test_bit(id, map->members); +} + +static int +bitmap_port_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_port_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_port_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *atd, *nested; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!test_bit(id, map->members)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_port *map = set->data; + const unsigned long *members = map->members; + u16 id = *(u16 *)value; + + return ip_set_timeout_test(members[id]); +} + +static int +bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + + if (ip_set_timeout_test(members[id])) + return -IPSET_ERR_EXIST; + + members[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + int ret = -IPSET_ERR_EXIST; + + if (ip_set_timeout_test(members[id])) + ret = 0; + + members[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_port_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *adt, *nested; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; + const unsigned long *members = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!ip_set_timeout_test(members[id])) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id]))); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_port *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + __be16 __port; + u16 port = 0; + + if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port)) + return -EINVAL; + + port = ntohs(__port); + + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + port -= map->first_port; + + return adtfn(set, &port, map->timeout); +} + +static int +bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct bitmap_port *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 timeout = map->timeout; + u32 port; /* wraparound */ + u16 id, port_to; + int ret = 0; + + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + id = port - map->first_port; + return adtfn(set, &id, timeout); + } + + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) { + swap(port, port_to); + if (port < map->first_port) + return -IPSET_ERR_BITMAP_RANGE; + } + } else + port_to = port; + + if (port_to > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + for (; port <= port_to; port++) { + id = port - map->first_port; + ret = adtfn(set, &id, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static void +bitmap_port_destroy(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_port_flush(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_port_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_port *x = a->data; + const struct bitmap_port *y = b->data; + + return x->first_port == y->first_port && + x->last_port == y->last_port && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_port = { + .kadt = bitmap_port_kadt, + .uadt = bitmap_port_uadt, + .adt = { + [IPSET_ADD] = bitmap_port_add, + [IPSET_DEL] = bitmap_port_del, + [IPSET_TEST] = bitmap_port_test, + }, + .destroy = bitmap_port_destroy, + .flush = bitmap_port_flush, + .head = bitmap_port_head, + .list = bitmap_port_list, + .same_set = bitmap_port_same_set, +}; + +static const struct ip_set_type_variant bitmap_tport = { + .kadt = bitmap_port_kadt, + .uadt = bitmap_port_uadt, + .adt = { + [IPSET_ADD] = bitmap_port_tadd, + [IPSET_DEL] = bitmap_port_tdel, + [IPSET_TEST] = bitmap_port_ttest, + }, + .destroy = bitmap_port_destroy, + .flush = bitmap_port_flush, + .head = bitmap_port_head, + .list = bitmap_port_tlist, + .same_set = bitmap_port_same_set, +}; + +static void +bitmap_port_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_port *map = set->data; + unsigned long *table = map->members; + u32 id; /* wraparound */ + u16 last = map->last_port - map->first_port; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_port_gc_init(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_port_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static bool +init_map_port(struct ip_set *set, struct bitmap_port *map, + u16 first_port, u16 last_port) +{ + map->members = ip_set_alloc(map->memsize); + if (!map->members) + return false; + map->first_port = first_port; + map->last_port = last_port; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_UNSPEC; + + return true; +} + +static int +bitmap_port_create(struct ip_set *set, struct nlattr *tb[], + u32 flags) +{ + struct bitmap_port *map; + u16 first_port, last_port; + + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (first_port > last_port) { + u16 tmp = first_port; + + first_port = last_port; + last_port = tmp; + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->memsize = (last_port - first_port + 1) + * sizeof(unsigned long); + + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->variant = &bitmap_tport; + + bitmap_port_gc_init(set); + } else { + map->memsize = bitmap_bytes(0, last_port - first_port); + pr_debug("memsize: %zu\n", map->memsize); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + + set->variant = &bitmap_port; + } + return 0; +} + +static struct ip_set_type bitmap_port_type = { + .name = "bitmap:port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_PORT, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = bitmap_port_create, + .create_policy = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_port_init(void) +{ + return ip_set_type_register(&bitmap_port_type); +} + +static void __exit +bitmap_port_fini(void) +{ + ip_set_type_unregister(&bitmap_port_type); +} + +module_init(bitmap_port_init); +module_exit(bitmap_port_fini); -- cgit v1.2.3-59-g8ed1b From 6c027889696a7a694b0e2f6e3cabadefec7553b6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:38:36 +0100 Subject: netfilter: ipset: hash:ip set type support The module implements the hash:ip type support in four flavours: for IPv4 or IPv6, both without and with timeout support. All the hash types are based on the "array hash" or ahash structure and functions as a good compromise between minimal memory footprint and speed. The hashing uses arrays to resolve clashes. The hash table is resized (doubled) when searching becomes too long. Resizing can be triggered by userspace add commands only and those are serialized by the nfnl mutex. During resizing the set is read-locked, so the only possible concurrent operations are the kernel side readers. Those are protected by RCU locking. Because of the four flavours and the other hash types, the functions are implemented in general forms in the ip_set_ahash.h header file and the real functions are generated before compiling by macro expansion. Thus the dereferencing of low-level functions and void pointer arguments could be avoided: the low-level functions are inlined, the function arguments are pointers of type-specific structures. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_ahash.h | 1074 ++++++++++++++++++++++++++ include/linux/netfilter/ipset/ip_set_hash.h | 26 + net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_hash_ip.c | 467 +++++++++++ 5 files changed, 1580 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_ahash.h create mode 100644 include/linux/netfilter/ipset/ip_set_hash.h create mode 100644 net/netfilter/ipset/ip_set_hash_ip.c diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h new file mode 100644 index 000000000000..ec9d9bea1e37 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -0,0 +1,1074 @@ +#ifndef _IP_SET_AHASH_H +#define _IP_SET_AHASH_H + +#include +#include +#include + +/* Hashing which uses arrays to resolve clashing. The hash table is resized + * (doubled) when searching becomes too long. + * Internally jhash is used with the assumption that the size of the + * stored data is a multiple of sizeof(u32). If storage supports timeout, + * the timeout field must be the last one in the data structure - that field + * is ignored when computing the hash key. + * + * Readers and resizing + * + * Resizing can be triggered by userspace command only, and those + * are serialized by the nfnl mutex. During resizing the set is + * read-locked, so the only possible concurrent operations are + * the kernel side readers. Those must be protected by proper RCU locking. + */ + +/* Number of elements to store in an initial array block */ +#define AHASH_INIT_SIZE 4 +/* Max number of elements to store in an array block */ +#define AHASH_MAX_SIZE (3*4) + +/* A hash bucket */ +struct hbucket { + void *value; /* the array of the values */ + u8 size; /* size of the array */ + u8 pos; /* position of the first free entry */ +}; + +/* The hash table: the table size stored here in order to make resizing easy */ +struct htable { + u8 htable_bits; /* size of hash table == 2^htable_bits */ + struct hbucket bucket[0]; /* hashtable buckets */ +}; + +#define hbucket(h, i) &((h)->bucket[i]) + +/* Book-keeping of the prefixes added to the set */ +struct ip_set_hash_nets { + u8 cidr; /* the different cidr values in the set */ + u32 nets; /* number of elements per cidr */ +}; + +/* The generic ip_set hash structure */ +struct ip_set_hash { + struct htable *table; /* the hash table */ + u32 maxelem; /* max elements in the hash */ + u32 elements; /* current element (vs timeout) */ + u32 initval; /* random jhash init value */ + u32 timeout; /* timeout value, if enabled */ + struct timer_list gc; /* garbage collection when timeout enabled */ +#ifdef IP_SET_HASH_WITH_NETMASK + u8 netmask; /* netmask value for subnets to store */ +#endif +#ifdef IP_SET_HASH_WITH_NETS + struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */ +#endif +}; + +/* Compute htable_bits from the user input parameter hashsize */ +static u8 +htable_bits(u32 hashsize) +{ + /* Assume that hashsize == 2^htable_bits */ + u8 bits = fls(hashsize - 1); + if (jhash_size(bits) != hashsize) + /* Round up to the first 2^n value */ + bits = fls(hashsize); + + return bits; +} + +#ifdef IP_SET_HASH_WITH_NETS + +#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) + +/* Network cidr size book keeping when the hash stores different + * sized networks */ +static void +add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask) +{ + u8 i; + + ++h->nets[cidr-1].nets; + + pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets > 1) + return; + + /* New cidr size */ + for (i = 0; i < host_mask && h->nets[i].cidr; i++) { + /* Add in increasing prefix order, so larger cidr first */ + if (h->nets[i].cidr < cidr) + swap(h->nets[i].cidr, cidr); + } + if (i < host_mask) + h->nets[i].cidr = cidr; +} + +static void +del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask) +{ + u8 i; + + --h->nets[cidr-1].nets; + + pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets); + + if (h->nets[cidr-1].nets != 0) + return; + + /* All entries with this cidr size deleted, so cleanup h->cidr[] */ + for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) { + if (h->nets[i].cidr == cidr) + h->nets[i].cidr = cidr = h->nets[i+1].cidr; + } + h->nets[i - 1].cidr = 0; +} +#endif + +/* Destroy the hashtable part of the set */ +static void +ahash_destroy(struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) + /* FIXME: use slab cache */ + kfree(n->value); + } + + ip_set_free(t); +} + +/* Calculate the actual memory size of the set data */ +static size_t +ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask) +{ + u32 i; + struct htable *t = h->table; + size_t memsize = sizeof(*h) + + sizeof(*t) +#ifdef IP_SET_HASH_WITH_NETS + + sizeof(struct ip_set_hash_nets) * host_mask +#endif + + jhash_size(t->htable_bits) * sizeof(struct hbucket); + + for (i = 0; i < jhash_size(t->htable_bits); i++) + memsize += t->bucket[i].size * dsize; + + return memsize; +} + +/* Flush a hash type of set: destroy all elements */ +static void +ip_set_hash_flush(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + n->size = n->pos = 0; + /* FIXME: use slab cache */ + kfree(n->value); + } + } +#ifdef IP_SET_HASH_WITH_NETS + memset(h->nets, 0, sizeof(struct ip_set_hash_nets) + * SET_HOST_MASK(set->family)); +#endif + h->elements = 0; +} + +/* Destroy a hash type of set */ +static void +ip_set_hash_destroy(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + + if (with_timeout(h->timeout)) + del_timer_sync(&h->gc); + + ahash_destroy(h->table); + kfree(h); + + set->data = NULL; +} + +#define HKEY(data, initval, htable_bits) \ +(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \ + & jhash_mask(htable_bits)) + +#endif /* _IP_SET_AHASH_H */ + +#define CONCAT(a, b, c) a##b##c +#define TOKEN(a, b, c) CONCAT(a, b, c) + +/* Type/family dependent function prototypes */ + +#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal) +#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull) +#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy) +#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out) +#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask) +#define type_pf_data_list TOKEN(TYPE, PF, _data_list) +#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) + +#define type_pf_elem TOKEN(TYPE, PF, _elem) +#define type_pf_telem TOKEN(TYPE, PF, _telem) +#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout) +#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired) +#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set) + +#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add) +#define type_pf_add TOKEN(TYPE, PF, _add) +#define type_pf_del TOKEN(TYPE, PF, _del) +#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs) +#define type_pf_test TOKEN(TYPE, PF, _test) + +#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd) +#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem) +#define type_pf_expire TOKEN(TYPE, PF, _expire) +#define type_pf_tadd TOKEN(TYPE, PF, _tadd) +#define type_pf_tdel TOKEN(TYPE, PF, _tdel) +#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs) +#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest) + +#define type_pf_resize TOKEN(TYPE, PF, _resize) +#define type_pf_tresize TOKEN(TYPE, PF, _tresize) +#define type_pf_flush ip_set_hash_flush +#define type_pf_destroy ip_set_hash_destroy +#define type_pf_head TOKEN(TYPE, PF, _head) +#define type_pf_list TOKEN(TYPE, PF, _list) +#define type_pf_tlist TOKEN(TYPE, PF, _tlist) +#define type_pf_same_set TOKEN(TYPE, PF, _same_set) +#define type_pf_kadt TOKEN(TYPE, PF, _kadt) +#define type_pf_uadt TOKEN(TYPE, PF, _uadt) +#define type_pf_gc TOKEN(TYPE, PF, _gc) +#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init) +#define type_pf_variant TOKEN(TYPE, PF, _variant) +#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant) + +/* Flavour without timeout */ + +/* Get the ith element from the array block n */ +#define ahash_data(n, i) \ + ((struct type_pf_elem *)((n)->value) + (i)) + +/* Add an element to the hash table when resizing the set: + * we spare the maintenance of the internal counters. */ +static int +type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value) +{ + if (n->pos >= n->size) { + void *tmp; + + if (n->size >= AHASH_MAX_SIZE) + /* Trigger rehashing */ + return -EAGAIN; + + tmp = kzalloc((n->size + AHASH_INIT_SIZE) + * sizeof(struct type_pf_elem), + GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + if (n->size) { + memcpy(tmp, n->value, + sizeof(struct type_pf_elem) * n->size); + kfree(n->value); + } + n->value = tmp; + n->size += AHASH_INIT_SIZE; + } + type_pf_data_copy(ahash_data(n, n->pos++), value); + return 0; +} + +/* Resize a hash: create a new hash table with doubling the hashsize + * and inserting the elements to it. Repeat until we succeed or + * fail due to memory pressures. */ +static int +type_pf_resize(struct ip_set *set, bool retried) +{ + struct ip_set_hash *h = set->data; + struct htable *t, *orig = h->table; + u8 htable_bits = orig->htable_bits; + const struct type_pf_elem *data; + struct hbucket *n, *m; + u32 i, j; + int ret; + +retry: + ret = 0; + htable_bits++; + pr_debug("attempt to resize set %s from %u to %u, t %p\n", + set->name, orig->htable_bits, htable_bits, orig); + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(sizeof(*t) + + jhash_size(htable_bits) * sizeof(struct hbucket)); + if (!t) + return -ENOMEM; + t->htable_bits = htable_bits; + + read_lock_bh(&set->lock); + for (i = 0; i < jhash_size(orig->htable_bits); i++) { + n = hbucket(orig, i); + for (j = 0; j < n->pos; j++) { + data = ahash_data(n, j); + m = hbucket(t, HKEY(data, h->initval, htable_bits)); + ret = type_pf_elem_add(m, data); + if (ret < 0) { + read_unlock_bh(&set->lock); + ahash_destroy(t); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + + rcu_assign_pointer(h->table, t); + read_unlock_bh(&set->lock); + + /* Give time to other readers of the set */ + synchronize_rcu_bh(); + + pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, + orig->htable_bits, orig, t->htable_bits, t); + ahash_destroy(orig); + + return 0; +} + +/* Add an element to a hash and update the internal counters when succeeded, + * otherwise report the proper error code. */ +static int +type_pf_add(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i, ret = 0; + u32 key; + + if (h->elements >= h->maxelem) + return -IPSET_ERR_HASH_FULL; + + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) + if (type_pf_data_equal(ahash_data(n, i), d)) { + ret = -IPSET_ERR_EXIST; + goto out; + } + + ret = type_pf_elem_add(n, value); + if (ret != 0) + goto out; + +#ifdef IP_SET_HASH_WITH_NETS + add_cidr(h, d->cidr, HOST_MASK); +#endif + h->elements++; +out: + rcu_read_unlock_bh(); + return ret; +} + +/* Delete an element from the hash: swap it with the last element + * and free up space if possible. + */ +static int +type_pf_del(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i; + struct type_pf_elem *data; + u32 key; + + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (!type_pf_data_equal(data, d)) + continue; + if (i != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, ahash_data(n, n->pos - 1)); + + n->pos--; + h->elements--; +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, d->cidr, HOST_MASK); +#endif + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_elem), + GFP_ATOMIC); + if (!tmp) + return 0; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_elem)); + kfree(n->value); + n->value = tmp; + } + return 0; + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS + +/* Special test function which takes into account the different network + * sizes added to the set */ +static int +type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct hbucket *n; + const struct type_pf_elem *data; + int i, j = 0; + u32 key; + u8 host_mask = SET_HOST_MASK(set->family); + + pr_debug("test by nets\n"); + for (; j < host_mask && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (type_pf_data_equal(data, d)) + return 1; + } + } + return 0; +} +#endif + +/* Test whether the element is added to the set */ +static int +type_pf_test(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *d = value; + struct hbucket *n; + const struct type_pf_elem *data; + int i; + u32 key; + +#ifdef IP_SET_HASH_WITH_NETS + /* If we test an IP address and not a network address, + * try all possible network sizes */ + if (d->cidr == SET_HOST_MASK(set->family)) + return type_pf_test_cidrs(set, d, timeout); +#endif + + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + if (type_pf_data_equal(data, d)) + return 1; + } + return 0; +} + +/* Reply a HEADER request: fill out the header part of the set */ +static int +type_pf_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct ip_set_hash *h = set->data; + struct nlattr *nested; + size_t memsize; + + read_lock_bh(&set->lock); + memsize = ahash_memsize(h, with_timeout(h->timeout) + ? sizeof(struct type_pf_telem) + : sizeof(struct type_pf_elem), + set->family == AF_INET ? 32 : 128); + read_unlock_bh(&set->lock); + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE, + htonl(jhash_size(h->table->htable_bits))); + NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)); +#ifdef IP_SET_HASH_WITH_NETMASK + if (h->netmask != HOST_MASK) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask); +#endif + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)); + if (with_timeout(h->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +/* Reply a LIST/SAVE request: dump the elements of the specified set */ +static int +type_pf_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct ip_set_hash *h = set->data; + const struct htable *t = h->table; + struct nlattr *atd, *nested; + const struct hbucket *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + /* We assume that one hash bucket fills into one page */ + void *incomplete; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + pr_debug("list hash set %s\n", set->name); + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + incomplete = skb_tail_pointer(skb); + n = hbucket(t, cb->args[2]); + pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + for (i = 0; i < n->pos; i++) { + data = ahash_data(n, i); + pr_debug("list hash %lu hbucket %p i %u, data %p\n", + cb->args[2], n, i, data); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (type_pf_data_list(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nlmsg_trim(skb, incomplete); + ipset_nest_end(skb, atd); + if (unlikely(first == cb->args[2])) { + pr_warning("Can't list set %s: one bucket does not fit into " + "a message. Please report it!\n", set->name); + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +type_pf_kadt(struct ip_set *set, const struct sk_buff * skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags); +static int +type_pf_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags); + +static const struct ip_set_type_variant type_pf_variant = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_add, + [IPSET_DEL] = type_pf_del, + [IPSET_TEST] = type_pf_test, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_list, + .resize = type_pf_resize, + .same_set = type_pf_same_set, +}; + +/* Flavour with timeout support */ + +#define ahash_tdata(n, i) \ + (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i)) + +static inline u32 +type_pf_data_timeout(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return tdata->timeout; +} + +static inline bool +type_pf_data_expired(const struct type_pf_elem *data) +{ + const struct type_pf_telem *tdata = + (const struct type_pf_telem *) data; + + return ip_set_timeout_expired(tdata->timeout); +} + +static inline void +type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout) +{ + struct type_pf_telem *tdata = (struct type_pf_telem *) data; + + tdata->timeout = ip_set_timeout_set(timeout); +} + +static int +type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value, + u32 timeout) +{ + struct type_pf_elem *data; + + if (n->pos >= n->size) { + void *tmp; + + if (n->size >= AHASH_MAX_SIZE) + /* Trigger rehashing */ + return -EAGAIN; + + tmp = kzalloc((n->size + AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + if (n->size) { + memcpy(tmp, n->value, + sizeof(struct type_pf_telem) * n->size); + kfree(n->value); + } + n->value = tmp; + n->size += AHASH_INIT_SIZE; + } + data = ahash_tdata(n, n->pos++); + type_pf_data_copy(data, value); + type_pf_data_timeout_set(data, timeout); + return 0; +} + +/* Delete expired elements from the hashtable */ +static void +type_pf_expire(struct ip_set_hash *h) +{ + struct htable *t = h->table; + struct hbucket *n; + struct type_pf_elem *data; + u32 i; + int j; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + for (j = 0; j < n->pos; j++) { + data = ahash_tdata(n, j); + if (type_pf_data_expired(data)) { + pr_debug("expired %u/%u\n", i, j); +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, data->cidr, HOST_MASK); +#endif + if (j != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, + ahash_tdata(n, n->pos - 1)); + n->pos--; + h->elements--; + } + } + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + /* Still try to delete expired elements */ + continue; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_telem)); + kfree(n->value); + n->value = tmp; + } + } +} + +static int +type_pf_tresize(struct ip_set *set, bool retried) +{ + struct ip_set_hash *h = set->data; + struct htable *t, *orig = h->table; + u8 htable_bits = orig->htable_bits; + const struct type_pf_elem *data; + struct hbucket *n, *m; + u32 i, j; + int ret; + + /* Try to cleanup once */ + if (!retried) { + i = h->elements; + write_lock_bh(&set->lock); + type_pf_expire(set->data); + write_unlock_bh(&set->lock); + if (h->elements < i) + return 0; + } + +retry: + ret = 0; + htable_bits++; + if (!htable_bits) + /* In case we have plenty of memory :-) */ + return -IPSET_ERR_HASH_FULL; + t = ip_set_alloc(sizeof(*t) + + jhash_size(htable_bits) * sizeof(struct hbucket)); + if (!t) + return -ENOMEM; + t->htable_bits = htable_bits; + + read_lock_bh(&set->lock); + for (i = 0; i < jhash_size(orig->htable_bits); i++) { + n = hbucket(orig, i); + for (j = 0; j < n->pos; j++) { + data = ahash_tdata(n, j); + m = hbucket(t, HKEY(data, h->initval, htable_bits)); + ret = type_pf_elem_tadd(m, data, + type_pf_data_timeout(data)); + if (ret < 0) { + read_unlock_bh(&set->lock); + ahash_destroy(t); + if (ret == -EAGAIN) + goto retry; + return ret; + } + } + } + + rcu_assign_pointer(h->table, t); + read_unlock_bh(&set->lock); + + /* Give time to other readers of the set */ + synchronize_rcu_bh(); + + ahash_destroy(orig); + + return 0; +} + +static int +type_pf_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + struct type_pf_elem *data; + int ret = 0, i, j = AHASH_MAX_SIZE + 1; + u32 key; + + if (h->elements >= h->maxelem) + /* FIXME: when set is full, we slow down here */ + type_pf_expire(h); + if (h->elements >= h->maxelem) + return -IPSET_ERR_HASH_FULL; + + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) { + if (type_pf_data_expired(data)) + j = i; + else { + ret = -IPSET_ERR_EXIST; + goto out; + } + } else if (j == AHASH_MAX_SIZE + 1 && + type_pf_data_expired(data)) + j = i; + } + if (j != AHASH_MAX_SIZE + 1) { + data = ahash_tdata(n, j); +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, data->cidr, HOST_MASK); + add_cidr(h, d->cidr, HOST_MASK); +#endif + type_pf_data_copy(data, d); + type_pf_data_timeout_set(data, timeout); + goto out; + } + ret = type_pf_elem_tadd(n, d, timeout); + if (ret != 0) + goto out; + +#ifdef IP_SET_HASH_WITH_NETS + add_cidr(h, d->cidr, HOST_MASK); +#endif + h->elements++; +out: + rcu_read_unlock_bh(); + return ret; +} + +static int +type_pf_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + const struct type_pf_elem *d = value; + struct hbucket *n; + int i, ret = 0; + struct type_pf_elem *data; + u32 key; + + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (!type_pf_data_equal(data, d)) + continue; + if (type_pf_data_expired(data)) + ret = -IPSET_ERR_EXIST; + if (i != n->pos - 1) + /* Not last one */ + type_pf_data_copy(data, ahash_tdata(n, n->pos - 1)); + + n->pos--; + h->elements--; +#ifdef IP_SET_HASH_WITH_NETS + del_cidr(h, d->cidr, HOST_MASK); +#endif + if (n->pos + AHASH_INIT_SIZE < n->size) { + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) + * sizeof(struct type_pf_telem), + GFP_ATOMIC); + if (!tmp) + return 0; + n->size -= AHASH_INIT_SIZE; + memcpy(tmp, n->value, + n->size * sizeof(struct type_pf_telem)); + kfree(n->value); + n->value = tmp; + } + return 0; + } + + return -IPSET_ERR_EXIST; +} + +#ifdef IP_SET_HASH_WITH_NETS +static int +type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *data; + struct hbucket *n; + int i, j = 0; + u32 key; + u8 host_mask = SET_HOST_MASK(set->family); + + for (; j < host_mask && h->nets[j].cidr; j++) { + type_pf_data_netmask(d, h->nets[j].cidr); + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + } + return 0; +} +#endif + +static int +type_pf_ttest(struct ip_set *set, void *value, u32 timeout) +{ + struct ip_set_hash *h = set->data; + struct htable *t = h->table; + struct type_pf_elem *data, *d = value; + struct hbucket *n; + int i; + u32 key; + +#ifdef IP_SET_HASH_WITH_NETS + if (d->cidr == SET_HOST_MASK(set->family)) + return type_pf_ttest_cidrs(set, d, timeout); +#endif + key = HKEY(d, h->initval, t->htable_bits); + n = hbucket(t, key); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + if (type_pf_data_equal(data, d)) + return !type_pf_data_expired(data); + } + return 0; +} + +static int +type_pf_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct ip_set_hash *h = set->data; + const struct htable *t = h->table; + struct nlattr *atd, *nested; + const struct hbucket *n; + const struct type_pf_elem *data; + u32 first = cb->args[2]; + /* We assume that one hash bucket fills into one page */ + void *incomplete; + int i; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + incomplete = skb_tail_pointer(skb); + n = hbucket(t, cb->args[2]); + for (i = 0; i < n->pos; i++) { + data = ahash_tdata(n, i); + pr_debug("list %p %u\n", n, i); + if (type_pf_data_expired(data)) + continue; + pr_debug("do list %p %u\n", n, i); + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (cb->args[2] == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + if (type_pf_data_tlist(skb, data)) + goto nla_put_failure; + ipset_nest_end(skb, nested); + } + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nlmsg_trim(skb, incomplete); + ipset_nest_end(skb, atd); + if (unlikely(first == cb->args[2])) { + pr_warning("Can't list set %s: one bucket does not fit into " + "a message. Please report it!\n", set->name); + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static const struct ip_set_type_variant type_pf_tvariant = { + .kadt = type_pf_kadt, + .uadt = type_pf_uadt, + .adt = { + [IPSET_ADD] = type_pf_tadd, + [IPSET_DEL] = type_pf_tdel, + [IPSET_TEST] = type_pf_ttest, + }, + .destroy = type_pf_destroy, + .flush = type_pf_flush, + .head = type_pf_head, + .list = type_pf_tlist, + .resize = type_pf_tresize, + .same_set = type_pf_same_set, +}; + +static void +type_pf_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set_hash *h = set->data; + + pr_debug("called\n"); + write_lock_bh(&set->lock); + type_pf_expire(h); + write_unlock_bh(&set->lock); + + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); +} + +static void +type_pf_gc_init(struct ip_set *set) +{ + struct ip_set_hash *h = set->data; + + init_timer(&h->gc); + h->gc.data = (unsigned long) set; + h->gc.function = type_pf_gc; + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + add_timer(&h->gc); + pr_debug("gc initialized, run in every %u\n", + IPSET_GC_PERIOD(h->timeout)); +} + +#undef type_pf_data_equal +#undef type_pf_data_isnull +#undef type_pf_data_copy +#undef type_pf_data_zero_out +#undef type_pf_data_list +#undef type_pf_data_tlist + +#undef type_pf_elem +#undef type_pf_telem +#undef type_pf_data_timeout +#undef type_pf_data_expired +#undef type_pf_data_netmask +#undef type_pf_data_timeout_set + +#undef type_pf_elem_add +#undef type_pf_add +#undef type_pf_del +#undef type_pf_test_cidrs +#undef type_pf_test + +#undef type_pf_elem_tadd +#undef type_pf_expire +#undef type_pf_tadd +#undef type_pf_tdel +#undef type_pf_ttest_cidrs +#undef type_pf_ttest + +#undef type_pf_resize +#undef type_pf_tresize +#undef type_pf_flush +#undef type_pf_destroy +#undef type_pf_head +#undef type_pf_list +#undef type_pf_tlist +#undef type_pf_same_set +#undef type_pf_kadt +#undef type_pf_uadt +#undef type_pf_gc +#undef type_pf_gc_init +#undef type_pf_variant +#undef type_pf_tvariant diff --git a/include/linux/netfilter/ipset/ip_set_hash.h b/include/linux/netfilter/ipset/ip_set_hash.h new file mode 100644 index 000000000000..b86f15c04524 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_hash.h @@ -0,0 +1,26 @@ +#ifndef __IP_SET_HASH_H +#define __IP_SET_HASH_H + +/* Hash type specific error codes */ +enum { + /* Hash is full */ + IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, + /* Null-valued element */ + IPSET_ERR_HASH_ELEM, + /* Invalid protocol */ + IPSET_ERR_INVALID_PROTO, + /* Protocol missing but must be specified */ + IPSET_ERR_MISSING_PROTO, +}; + +#ifdef __KERNEL__ + +#define IPSET_DEFAULT_HASHSIZE 1024 +#define IPSET_MIMINAL_HASHSIZE 64 +#define IPSET_DEFAULT_MAXELEM 65536 +#define IPSET_DEFAULT_PROBES 4 +#define IPSET_DEFAULT_RESIZE 100 + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_HASH_H */ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index f401e9112703..194d89caeb3d 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -50,4 +50,14 @@ config IP_SET_BITMAP_PORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IP + tristate "hash:ip set support" + depends on IP_SET + help + This option adds the hash:ip set type support, by which one + can store arbitrary IPv4 or IPv6 addresses (or network addresses) + in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 40866e267752..5cbf00c41a98 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -11,3 +11,6 @@ obj-$(CONFIG_IP_SET) += ip_set.o obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o + +# hash types +obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c new file mode 100644 index 000000000000..53964bc831aa --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -0,0 +1,467 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip"); + +/* Type specific function prefix */ +#define TYPE hash_ip + +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ip4_same_set hash_ip_same_set +#define hash_ip6_same_set hash_ip_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ip4_elem { + __be32 ip; +}; + +/* Member elements with timeout support */ +struct hash_ip4_telem { + __be32 ip; + unsigned long timeout; +}; + +static inline bool +hash_ip4_data_equal(const struct hash_ip4_elem *ip1, + const struct hash_ip4_elem *ip2) +{ + return ip1->ip == ip2->ip; +} + +static inline bool +hash_ip4_data_isnull(const struct hash_ip4_elem *elem) +{ + return elem->ip == 0; +} + +static inline void +hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src) +{ + dst->ip = src->ip; +} + +/* Zero valued IP addresses cannot be stored */ +static inline void +hash_ip4_data_zero_out(struct hash_ip4_elem *elem) +{ + elem->ip = 0; +} + +static inline bool +hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) +{ + const struct hash_ip4_telem *tdata = + (const struct hash_ip4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETMASK +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + __be32 ip; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip); + ip &= ip_set_netmask(h->netmask); + if (ip == 0) + return -EINVAL; + + return adtfn(set, &ip, h->timeout); +} + +static int +hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip, ip_to, hosts, timeout = h->timeout; + __be32 nip; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ip &= ip_set_hostmask(h->netmask); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + nip = htonl(ip); + if (nip == 0) + return -IPSET_ERR_HASH_ELEM; + return adtfn(set, &nip, timeout); + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + + for (; !before(ip_to, ip); ip += hosts) { + nip = htonl(ip); + if (nip == 0) + return -IPSET_ERR_HASH_ELEM; + ret = adtfn(set, &nip, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout && + x->netmask == y->netmask; +} + +/* The type variant functions: IPv6 */ + +struct hash_ip6_elem { + union nf_inet_addr ip; +}; + +struct hash_ip6_telem { + union nf_inet_addr ip; + unsigned long timeout; +}; + +static inline bool +hash_ip6_data_equal(const struct hash_ip6_elem *ip1, + const struct hash_ip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; +} + +static inline bool +hash_ip6_data_isnull(const struct hash_ip6_elem *elem) +{ + return ipv6_addr_any(&elem->ip.in6); +} + +static inline void +hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +} + +static inline void +hash_ip6_data_zero_out(struct hash_ip6_elem *elem) +{ + ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static bool +hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + const struct hash_ip6_telem *e = + (const struct hash_ip6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + union nf_inet_addr ip; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6); + ip6_netmask(&ip, h->netmask); + if (ipv6_addr_any(&ip.in6)) + return -EINVAL; + + return adtfn(set, &ip, h->timeout); +} + +static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, +}; + +static int +hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + union nf_inet_addr ip; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ip6_netmask(&ip, h->netmask); + if (ipv6_addr_any(&ip.in6)) + return -IPSET_ERR_HASH_ELEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &ip, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 netmask, hbits; + struct ip_set_hash *h; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + netmask = set->family == AF_INET ? 32 : 128; + pr_debug("Create set %s with family %s\n", + set->name, set->family == AF_INET ? "inet" : "inet6"); + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if ((set->family == AF_INET && netmask > 32) || + (set->family == AF_INET6 && netmask > 128) || + netmask == 0) + return -IPSET_ERR_INVALID_NETMASK; + } + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->netmask = netmask; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ip4_tvariant : &hash_ip6_tvariant; + + if (set->family == AF_INET) + hash_ip4_gc_init(set); + else + hash_ip6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ip4_variant : &hash_ip6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ip_type __read_mostly = { + .name = "hash:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ip_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ip_init(void) +{ + return ip_set_type_register(&hash_ip_type); +} + +static void __exit +hash_ip_fini(void) +{ + ip_set_type_unregister(&hash_ip_type); +} + +module_init(hash_ip_init); +module_exit(hash_ip_fini); -- cgit v1.2.3-59-g8ed1b From 07896ed37b94599a1b8ea97f4bd5766be71390f4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:39:52 +0100 Subject: netfilter: ipset: hash:ip,port set type support The module implements the hash:ip,port type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are two dimensional: IPv4/IPv6 address and protocol/port pairs. The port is interpeted for TCP, UPD, ICMP and ICMPv6 (at the latters as type/code of course). Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipport.c | 547 +++++++++++++++++++++++++++++++ 3 files changed, 557 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipport.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 194d89caeb3d..325b5312a47d 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -60,4 +60,13 @@ config IP_SET_HASH_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORT + tristate "hash:ip,port set support" + depends on IP_SET + help + This option adds the hash:ip,port set type support, by which one + can store IPv4/IPv6 address and protocol/port pairs. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 5cbf00c41a98..6a3663e5cef1 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c new file mode 100644 index 000000000000..d9b192818e58 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -0,0 +1,547 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port"); + +/* Type specific function prefix */ +#define TYPE hash_ipport + +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipport4_same_set hash_ipport_same_set +#define hash_ipport6_same_set hash_ipport_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipport4_elem { + __be32 ip; + __be16 port; + u8 proto; + u8 padding; +}; + +/* Member elements with timeout support */ +struct hash_ipport4_telem { + __be32 ip; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, + const struct hash_ipport4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipport4_data_copy(struct hash_ipport4_elem *dst, + const struct hash_ipport4_elem *src) +{ + dst->ip = src->ip; + dst->port = src->port; + dst->proto = src->proto; +} + +static inline void +hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipport4_data_list(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipport4_data_tlist(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + const struct hash_ipport4_telem *tdata = + (const struct hash_ipport4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = { }; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = { }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipport6_elem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 padding; +}; + +struct hash_ipport6_telem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, + const struct hash_ipport6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipport6_data_copy(struct hash_ipport6_elem *dst, + const struct hash_ipport6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipport6_data_list(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipport6_data_tlist(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + const struct hash_ipport6_telem *e = + (const struct hash_ipport6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = { }; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = { }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipport4_tvariant : &hash_ipport6_tvariant; + + if (set->family == AF_INET) + hash_ipport4_gc_init(set); + else + hash_ipport6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipport4_variant : &hash_ipport6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipport_type __read_mostly = { + .name = "hash:ip,port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipport_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipport_init(void) +{ + return ip_set_type_register(&hash_ipport_type); +} + +static void __exit +hash_ipport_fini(void) +{ + ip_set_type_unregister(&hash_ipport_type); +} + +module_init(hash_ipport_init); +module_exit(hash_ipport_fini); -- cgit v1.2.3-59-g8ed1b From 5663bc30e6114b6ba88cc428619ede46a3246a7b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:41:26 +0100 Subject: netfilter: ipset: hash:ip,port,ip set type support The module implements the hash:ip,port,ip type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are three dimensional: IPv4/IPv6 address, protocol/port and IPv4/IPv6 address triples. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipportip.c | 565 +++++++++++++++++++++++++++++ 3 files changed, 576 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipportip.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 325b5312a47d..e69355334252 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -69,4 +69,14 @@ config IP_SET_HASH_IPPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORTIP + tristate "hash:ip,port,ip set support" + depends on IP_SET + help + This option adds the hash:ip,port,ip set type support, by which + one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6 + address triples in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6a3663e5cef1..e9ddb25189d0 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o +obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c new file mode 100644 index 000000000000..80dae9de5d18 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -0,0 +1,565 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,ip"); + +/* Type specific function prefix */ +#define TYPE hash_ipportip + +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportip4_same_set hash_ipportip_same_set +#define hash_ipportip6_same_set hash_ipportip_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportip4_elem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 proto; + u8 padding; +}; + +/* Member elements with timeout support */ +struct hash_ipportip4_telem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, + const struct hash_ipportip4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->ip2 == ip2->ip2 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst, + const struct hash_ipportip4_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportip4_data_list(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportip4_data_tlist(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + const struct hash_ipportip4_telem *tdata = + (const struct hash_ipportip4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = { }; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = { }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipportip6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 proto; + u8 padding; +}; + +struct hash_ipportip6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, + const struct hash_ipportip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst, + const struct hash_ipportip6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportip6_data_list(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportip6_data_tlist(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + const struct hash_ipportip6_telem *e = + (const struct hash_ipportip6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = { }; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = { }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant; + + if (set->family == AF_INET) + hash_ipportip4_gc_init(set); + else + hash_ipportip6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportip4_variant : &hash_ipportip6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipportip_type __read_mostly = { + .name = "hash:ip,port,ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportip_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportip_init(void) +{ + return ip_set_type_register(&hash_ipportip_type); +} + +static void __exit +hash_ipportip_fini(void) +{ + ip_set_type_unregister(&hash_ipportip_type); +} + +module_init(hash_ipportip_init); +module_exit(hash_ipportip_fini); -- cgit v1.2.3-59-g8ed1b From 41d22f7b2e48c77175b62cec3797d7d7173a626e Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:51:00 +0100 Subject: netfilter: ipset: hash:ip,port,net set type support The module implements the hash:ip,port,net type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are three dimensional: IPv4/IPv6 address, protocol/port and IPv4/IPv6 network address/prefix triples. The different prefixes are searched/matched from the longest prefix to the shortes one (most specific to least). In other words the processing time linearly grows with the number of different prefixes in the set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipportnet.c | 631 ++++++++++++++++++++++++++++ 3 files changed, 642 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipportnet.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index e69355334252..e2fbaa9d902a 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -79,4 +79,14 @@ config IP_SET_HASH_IPPORTIP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORTNET + tristate "hash:ip,port,net set support" + depends on IP_SET + help + This option adds the hash:ip,port,net set type support, by which + one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6 + network address/prefix triples in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index e9ddb25189d0..9c5d857511c4 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o +obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c new file mode 100644 index 000000000000..8eacd8a46c1c --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -0,0 +1,631 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,net type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,net"); + +/* Type specific function prefix */ +#define TYPE hash_ipportnet + +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportnet4_same_set hash_ipportnet_same_set +#define hash_ipportnet6_same_set hash_ipportnet_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportnet4_elem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 cidr; + u8 proto; +}; + +/* Member elements with timeout support */ +struct hash_ipportnet4_telem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 cidr; + u8 proto; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, + const struct hash_ipportnet4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->ip2 == ip2->ip2 && + ip1->cidr == ip2->cidr && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst, + const struct hash_ipportnet4_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr) +{ + elem->ip2 &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportnet4_data_list(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportnet4_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) +{ + const struct hash_ipportnet4_telem *tdata = + (const struct hash_ipportnet4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + data.ip2 &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip2 &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipportnet6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 cidr; + u8 proto; +}; + +struct hash_ipportnet6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 cidr; + u8 proto; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, + const struct hash_ipportnet6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + ip1->cidr == ip2->cidr && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst, + const struct hash_ipportnet6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) +{ + elem->proto = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip2, cidr); + elem->cidr = cidr; +} + +static bool +hash_ipportnet6_data_list(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportnet6_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + const struct hash_ipportnet6_telem *e = + (const struct hash_ipportnet6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6_netmask(&data.ip2, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip2, data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportnet4_tvariant + : &hash_ipportnet6_tvariant; + + if (set->family == AF_INET) + hash_ipportnet4_gc_init(set); + else + hash_ipportnet6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportnet4_variant : &hash_ipportnet6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipportnet_type __read_mostly = { + .name = "hash:ip,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportnet_init(void) +{ + return ip_set_type_register(&hash_ipportnet_type); +} + +static void __exit +hash_ipportnet_fini(void) +{ + ip_set_type_unregister(&hash_ipportnet_type); +} + +module_init(hash_ipportnet_init); +module_exit(hash_ipportnet_fini); -- cgit v1.2.3-59-g8ed1b From b38370299eeaba4cf8a9e0c5c6acc2a1e049be23 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:52:54 +0100 Subject: netfilter: ipset: hash:net set type support The module implements the hash:net type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are one dimensional: IPv4/IPv6 network address/prefixes. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_net.c | 461 ++++++++++++++++++++++++++++++++++ 3 files changed, 471 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_net.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index e2fbaa9d902a..8d85de00e9e0 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -89,4 +89,13 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NET + tristate "hash:net set support" + depends on IP_SET + help + This option adds the hash:net set type support, by which + one can store IPv4/IPv6 network address/prefix elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 9c5d857511c4..fd5eeb6a7e5b 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o +obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c new file mode 100644 index 000000000000..fb0e6a68d13e --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -0,0 +1,461 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:net type of IP sets"); +MODULE_ALIAS("ip_set_hash:net"); + +/* Type specific function prefix */ +#define TYPE hash_net + +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_net4_same_set hash_net_same_set +#define hash_net6_same_set hash_net_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_net4_elem { + __be32 ip; + u16 padding0; + u8 padding1; + u8 cidr; +}; + +/* Member elements with timeout support */ +struct hash_net4_telem { + __be32 ip; + u16 padding0; + u8 padding1; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_net4_data_equal(const struct hash_net4_elem *ip1, + const struct hash_net4_elem *ip2) +{ + return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr; +} + +static inline bool +hash_net4_data_isnull(const struct hash_net4_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_net4_data_copy(struct hash_net4_elem *dst, + const struct hash_net4_elem *src) +{ + dst->ip = src->ip; + dst->cidr = src->cidr; +} + +static inline void +hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +/* Zero CIDR values cannot be stored */ +static inline void +hash_net4_data_zero_out(struct hash_net4_elem *elem) +{ + elem->cidr = 0; +} + +static bool +hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) +{ + const struct hash_net4_telem *tdata = + (const struct hash_net4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_net6_elem { + union nf_inet_addr ip; + u16 padding0; + u8 padding1; + u8 cidr; +}; + +struct hash_net6_telem { + union nf_inet_addr ip; + u16 padding0; + u8 padding1; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_net6_data_equal(const struct hash_net6_elem *ip1, + const struct hash_net6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_net6_data_isnull(const struct hash_net6_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_net6_data_copy(struct hash_net6_elem *dst, + const struct hash_net6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + dst->cidr = src->cidr; +} + +static inline void +hash_net6_data_zero_out(struct hash_net6_elem *elem) +{ + elem->cidr = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) +{ + const struct hash_net6_telem *e = + (const struct hash_net6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct ip_set_hash *h; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_net4_tvariant : &hash_net6_tvariant; + + if (set->family == AF_INET) + hash_net4_gc_init(set); + else + hash_net6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_net4_variant : &hash_net6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_net_type __read_mostly = { + .name = "hash:net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_net_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_net_init(void) +{ + return ip_set_type_register(&hash_net_type); +} + +static void __exit +hash_net_fini(void) +{ + ip_set_type_unregister(&hash_net_type); +} + +module_init(hash_net_init); +module_exit(hash_net_fini); -- cgit v1.2.3-59-g8ed1b From 21f45020a3084f80fcdd5f056a0c6389f5406399 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:53:55 +0100 Subject: netfilter: ipset: hash:net,port set type support The module implements the hash:net,port type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are two dimensional: IPv4/IPv6 network address/prefix and protocol/port pairs. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_netport.c | 581 ++++++++++++++++++++++++++++++ 3 files changed, 592 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_netport.c diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 8d85de00e9e0..2512e7b82d12 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -98,4 +98,14 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORT + tristate "hash:net,port set support" + depends on IP_SET + help + This option adds the hash:net,port set type support, by which + one can store IPv4/IPv6 network address/prefix and + protocol/port pairs as elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index fd5eeb6a7e5b..fbbebd62bb2a 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o +obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c new file mode 100644 index 000000000000..342250f9b114 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -0,0 +1,581 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net,port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:net,port type of IP sets"); +MODULE_ALIAS("ip_set_hash:net,port"); + +/* Type specific function prefix */ +#define TYPE hash_netport + +static bool +hash_netport_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_netport4_same_set hash_netport_same_set +#define hash_netport6_same_set hash_netport_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_netport4_elem { + __be32 ip; + __be16 port; + u8 proto; + u8 cidr; +}; + +/* Member elements with timeout support */ +struct hash_netport4_telem { + __be32 ip; + __be16 port; + u8 proto; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_netport4_data_equal(const struct hash_netport4_elem *ip1, + const struct hash_netport4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->port == ip2->port && + ip1->proto == ip2->proto && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_netport4_data_isnull(const struct hash_netport4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_netport4_data_copy(struct hash_netport4_elem *dst, + const struct hash_netport4_elem *src) +{ + dst->ip = src->ip; + dst->port = src->port; + dst->proto = src->proto; + dst->cidr = src->cidr; +} + +static inline void +hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_netport4_data_zero_out(struct hash_netport4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_netport4_data_list(struct sk_buff *skb, + const struct hash_netport4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netport4_data_tlist(struct sk_buff *skb, + const struct hash_netport4_elem *data) +{ + const struct hash_netport4_telem *tdata = + (const struct hash_netport4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem data = { + .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + data.ip &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_netport_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_netport6_elem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 cidr; +}; + +struct hash_netport6_telem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_netport6_data_equal(const struct hash_netport6_elem *ip1, + const struct hash_netport6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_netport6_data_isnull(const struct hash_netport6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_netport6_data_copy(struct hash_netport6_elem *dst, + const struct hash_netport6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_netport6_data_zero_out(struct hash_netport6_elem *elem) +{ + elem->proto = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_netport6_data_list(struct sk_buff *skb, + const struct hash_netport6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netport6_data_tlist(struct sk_buff *skb, + const struct hash_netport6_elem *data) +{ + const struct hash_netport6_telem *e = + (const struct hash_netport6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport6_elem data = { + .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport6_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_netport4_tvariant : &hash_netport6_tvariant; + + if (set->family == AF_INET) + hash_netport4_gc_init(set); + else + hash_netport6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_netport4_variant : &hash_netport6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_netport_type __read_mostly = { + .name = "hash:net,port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_netport_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netport_init(void) +{ + return ip_set_type_register(&hash_netport_type); +} + +static void __exit +hash_netport_fini(void) +{ + ip_set_type_unregister(&hash_netport_type); +} + +module_init(hash_netport_init); +module_exit(hash_netport_fini); -- cgit v1.2.3-59-g8ed1b From f830837f0eed0f9e371b8fd65169365780814bb1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:54:59 +0100 Subject: netfilter: ipset: list:set set type support The module implements the list:set type support in two flavours: without and with timeout. The sets has two sides: for the userspace, they store the names of other (non list:set type of) sets: one can add, delete and test set names. For the kernel, it forms an ordered union of the member sets: the members sets are tried in order when elements are added, deleted and tested and the process stops at the first success. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_list.h | 27 ++ net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_list_set.c | 584 ++++++++++++++++++++++++++++ 4 files changed, 624 insertions(+) create mode 100644 include/linux/netfilter/ipset/ip_set_list.h create mode 100644 net/netfilter/ipset/ip_set_list_set.c diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h new file mode 100644 index 000000000000..40a63f302613 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_list.h @@ -0,0 +1,27 @@ +#ifndef __IP_SET_LIST_H +#define __IP_SET_LIST_H + +/* List type specific error codes */ +enum { + /* Set name to be added/deleted/tested does not exist. */ + IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, + /* list:set type is not permitted to add */ + IPSET_ERR_LOOP, + /* Missing reference set */ + IPSET_ERR_BEFORE, + /* Reference set does not exist */ + IPSET_ERR_NAMEREF, + /* Set is full */ + IPSET_ERR_LIST_FULL, + /* Reference set is not added to the set */ + IPSET_ERR_REF_EXIST, +}; + +#ifdef __KERNEL__ + +#define IP_SET_LIST_DEFAULT_SIZE 8 +#define IP_SET_LIST_MIN_SIZE 4 + +#endif /* __KERNEL__ */ + +#endif /* __IP_SET_LIST_H */ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 2512e7b82d12..3b970d343023 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -108,4 +108,14 @@ config IP_SET_HASH_NETPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_LIST_SET + tristate "list:set set support" + depends on IP_SET + help + This option adds the list:set set type support. In this + kind of set one can store the name of other sets and it forms + an ordered union of the member sets. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index fbbebd62bb2a..5adbdab67bd2 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -19,3 +19,6 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o + +# list types +obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c new file mode 100644 index 000000000000..a47c32982f06 --- /dev/null +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -0,0 +1,584 @@ +/* Copyright (C) 2008-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the list:set type */ + +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("list:set type of IP sets"); +MODULE_ALIAS("ip_set_list:set"); + +/* Member elements without and with timeout */ +struct set_elem { + ip_set_id_t id; +}; + +struct set_telem { + ip_set_id_t id; + unsigned long timeout; +}; + +/* Type structure */ +struct list_set { + size_t dsize; /* element size */ + u32 size; /* size of set list array */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collection */ + struct set_elem members[0]; /* the set members */ +}; + +static inline struct set_elem * +list_set_elem(const struct list_set *map, u32 id) +{ + return (struct set_elem *)((char *)map->members + id * map->dsize); +} + +static inline bool +list_set_timeout(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +list_set_expired(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} + +static inline int +list_set_exist(const struct set_telem *elem) +{ + return elem->id != IPSET_INVALID_ID && + !ip_set_timeout_expired(elem->timeout); +} + +/* Set list without and with timeout */ + +static int +list_set_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + int ret; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + return 0; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + switch (adt) { + case IPSET_TEST: + ret = ip_set_test(elem->id, skb, pf, dim, flags); + if (ret > 0) + return ret; + break; + case IPSET_ADD: + ret = ip_set_add(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + case IPSET_DEL: + ret = ip_set_del(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + default: + break; + } + } + return -EINVAL; +} + +static bool +next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +{ + const struct set_elem *elem; + + if (i + 1 < map->size) { + elem = list_set_elem(map, i + 1); + return !!(elem->id == id && + !(with_timeout(map->timeout) && + list_set_expired(map, i + 1))); + } + + return 0; +} + +static void +list_elem_add(struct list_set *map, u32 i, ip_set_id_t id) +{ + struct set_elem *e; + + for (; i < map->size; i++) { + e = list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + } +} + +static void +list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) +{ + struct set_telem *e; + + for (; i < map->size; i++) { + e = (struct set_telem *)list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + swap(e->timeout, timeout); + } +} + +static int +list_set_add(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) +{ + const struct set_elem *e = list_set_elem(map, i); + + if (i == map->size - 1 && e->id != IPSET_INVALID_ID) + /* Last element replaced: e.g. add new,before,last */ + ip_set_put_byindex(e->id); + if (with_timeout(map->timeout)) + list_elem_tadd(map, i, id, timeout); + else + list_elem_add(map, i, id); + + return 0; +} + +static int +list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +{ + struct set_elem *a = list_set_elem(map, i), *b; + + ip_set_put_byindex(id); + + for (; i < map->size - 1; i++) { + b = list_set_elem(map, i + 1); + a->id = b->id; + if (with_timeout(map->timeout)) + ((struct set_telem *)a)->timeout = + ((struct set_telem *)b)->timeout; + a = b; + if (a->id == IPSET_INVALID_ID) + break; + } + /* Last element */ + a->id = IPSET_INVALID_ID; + return 0; +} + +static int +list_set_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct list_set *map = set->data; + bool with_timeout = with_timeout(map->timeout); + int before = 0; + u32 timeout = map->timeout; + ip_set_id_t id, refid = IPSET_INVALID_ID; + const struct set_elem *elem; + struct ip_set *s; + u32 i; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_NAME] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + if (id == IPSET_INVALID_ID) + return -IPSET_ERR_NAME; + /* "Loop detection" */ + if (s->type->features & IPSET_TYPE_NAME) { + ret = -IPSET_ERR_LOOP; + goto finish; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + before = f & IPSET_FLAG_BEFORE; + } + + if (before && !tb[IPSET_ATTR_NAMEREF]) { + ret = -IPSET_ERR_BEFORE; + goto finish; + } + + if (tb[IPSET_ATTR_NAMEREF]) { + refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + &s); + if (refid == IPSET_INVALID_ID) { + ret = -IPSET_ERR_NAMEREF; + goto finish; + } + if (!before) + before = -1; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) { + ret = -IPSET_ERR_TIMEOUT; + goto finish; + } + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + switch (adt) { + case IPSET_TEST: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID || + (before != 0 && i + 1 >= map->size)) + break; + else if (with_timeout && list_set_expired(map, i)) + continue; + else if (before > 0 && elem->id == id) + ret = next_id_eq(map, i, refid); + else if (before < 0 && elem->id == refid) + ret = next_id_eq(map, i, id); + else if (before == 0 && elem->id == id) + ret = 1; + } + break; + case IPSET_ADD: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == id && + !(with_timeout && list_set_expired(map, i))) + ret = -IPSET_ERR_EXIST; + } + if (ret == -IPSET_ERR_EXIST) + break; + ret = -IPSET_ERR_LIST_FULL; + for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : list_set_add(map, i, id, timeout); + else if (elem->id != refid) + continue; + else if (with_timeout && list_set_expired(map, i)) + ret = -IPSET_ERR_REF_EXIST; + else if (before) + ret = list_set_add(map, i, id, timeout); + else if (i + 1 < map->size) + ret = list_set_add(map, i + 1, id, timeout); + } + break; + case IPSET_DEL: + ret = -IPSET_ERR_EXIST; + for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) { + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : -IPSET_ERR_EXIST; + break; + } else if (with_timeout && list_set_expired(map, i)) + continue; + else if (elem->id == id && + (before == 0 || + (before > 0 && + next_id_eq(map, i, refid)))) + ret = list_set_del(map, id, i); + else if (before < 0 && + elem->id == refid && + next_id_eq(map, i, id)) + ret = list_set_del(map, id, i + 1); + } + break; + default: + break; + } + +finish: + if (refid != IPSET_INVALID_ID) + ip_set_put_byindex(refid); + if (adt != IPSET_ADD || ret) + ip_set_put_byindex(id); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static void +list_set_flush(struct ip_set *set) +{ + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id != IPSET_INVALID_ID) { + ip_set_put_byindex(elem->id); + elem->id = IPSET_INVALID_ID; + } + } +} + +static void +list_set_destroy(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + list_set_flush(set); + kfree(map); + + set->data = NULL; +} + +static int +list_set_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct list_set *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->size * map->dsize)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static int +list_set_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct list_set *map = set->data; + struct nlattr *atd, *nested; + u32 i, first = cb->args[2]; + const struct set_elem *e; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < map->size; cb->args[2]++) { + i = cb->args[2]; + e = list_set_elem(map, i); + if (e->id == IPSET_INVALID_ID) + goto finish; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (i == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_STRING(skb, IPSET_ATTR_NAME, + ip_set_name_byindex(e->id)); + if (with_timeout(map->timeout)) { + const struct set_telem *te = + (const struct set_telem *) e; + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(te->timeout))); + } + ipset_nest_end(skb, nested); + } +finish: + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(i == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static bool +list_set_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct list_set *x = a->data; + const struct list_set *y = b->data; + + return x->size == y->size && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant list_set = { + .kadt = list_set_kadt, + .uadt = list_set_uadt, + .destroy = list_set_destroy, + .flush = list_set_flush, + .head = list_set_head, + .list = list_set_list, + .same_set = list_set_same_set, +}; + +static void +list_set_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct list_set *map = set->data; + struct set_telem *e; + u32 i; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (i = map->size - 1; i >= 0; i--) { + e = (struct set_telem *) list_set_elem(map, i); + if (e->id != IPSET_INVALID_ID && + list_set_expired(map, i)) + list_set_del(map, e->id, i); + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +list_set_gc_init(struct ip_set *set) +{ + struct list_set *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = list_set_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create list:set type of sets */ + +static bool +init_list_set(struct ip_set *set, u32 size, size_t dsize, + unsigned long timeout) +{ + struct list_set *map; + struct set_elem *e; + u32 i; + + map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + if (!map) + return false; + + map->size = size; + map->dsize = dsize; + map->timeout = timeout; + set->data = map; + + for (i = 0; i < size; i++) { + e = list_set_elem(map, i); + e->id = IPSET_INVALID_ID; + } + + return true; +} + +static int +list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 size = IP_SET_LIST_DEFAULT_SIZE; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_SIZE]) + size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); + if (size < IP_SET_LIST_MIN_SIZE) + size = IP_SET_LIST_MIN_SIZE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!init_list_set(set, size, sizeof(struct set_telem), + ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]))) + return -ENOMEM; + + list_set_gc_init(set); + } else { + if (!init_list_set(set, size, sizeof(struct set_elem), + IPSET_NO_TIMEOUT)) + return -ENOMEM; + } + set->variant = &list_set; + return 0; +} + +static struct ip_set_type list_set_type __read_mostly = { + .name = "list:set", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = list_set_create, + .create_policy = { + [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_NAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +list_set_init(void) +{ + return ip_set_type_register(&list_set_type); +} + +static void __exit +list_set_fini(void) +{ + ip_set_type_unregister(&list_set_type); +} + +module_init(list_set_init); +module_exit(list_set_fini); -- cgit v1.2.3-59-g8ed1b From d956798d82d2d331c031301965d69e17a1a48a2b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:56:00 +0100 Subject: netfilter: xtables: "set" match and "SET" target support The patch adds the combined module of the "SET" target and "set" match to netfilter. Both the previous and the current revisions are supported. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_set.h | 55 ++++++ net/netfilter/Kconfig | 12 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_set.c | 359 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 427 insertions(+) create mode 100644 include/linux/netfilter/xt_set.h create mode 100644 net/netfilter/xt_set.c diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h new file mode 100644 index 000000000000..69b2bd1fb818 --- /dev/null +++ b/include/linux/netfilter/xt_set.h @@ -0,0 +1,55 @@ +#ifndef _XT_SET_H +#define _XT_SET_H + +#include + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +/* + * Option flags for kernel operations (xt_set_info_v0) + */ +#define IPSET_SRC 0x01 /* Source match/add */ +#define IPSET_DST 0x02 /* Destination match/add */ +#define IPSET_MATCH_INV 0x04 /* Inverse matching */ + +struct xt_set_info_v0 { + ip_set_id_t index; + union { + __u32 flags[IPSET_DIM_MAX + 1]; + struct { + __u32 __flags[IPSET_DIM_MAX]; + __u8 dim; + __u8 flags; + } compat; + } u; +}; + +/* match and target infos */ +struct xt_set_info_match_v0 { + struct xt_set_info_v0 match_set; +}; + +struct xt_set_info_target_v0 { + struct xt_set_info_v0 add_set; + struct xt_set_info_v0 del_set; +}; + +/* Revision 1: current interface to netfilter/iptables */ + +struct xt_set_info { + ip_set_id_t index; + __u8 dim; + __u8 flags; +}; + +/* match and target infos */ +struct xt_set_info_match { + struct xt_set_info match_set; +}; + +struct xt_set_info_target { + struct xt_set_info add_set; + struct xt_set_info del_set; +}; + +#endif /*_XT_SET_H*/ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 351abf8ace13..06fa9e4e45c7 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -352,6 +352,18 @@ config NETFILTER_XT_CONNMARK ctmark), similarly to the packet mark (nfmark). Using this target and match, you can set and match on this mark. +config NETFILTER_XT_SET + tristate 'set target and match support' + depends on IP_SET + depends on NETFILTER_ADVANCED + help + This option adds the "SET" target and "set" match. + + Using this target and match, you can add/delete and match + elements in the sets created by ipset(8). + + To compile it as a module, choose M here. If unsure, say N. + # alphabetically ordered list of targets comment "Xtables targets" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 510b586ccb7f..1148643559cb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # combos obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o +obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c new file mode 100644 index 000000000000..061d48cec137 --- /dev/null +++ b/net/netfilter/xt_set.c @@ -0,0 +1,359 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module which implements the set match and SET target + * for netfilter/iptables. */ + +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("Xtables: IP set match and target module"); +MODULE_ALIAS("xt_SET"); +MODULE_ALIAS("ipt_set"); +MODULE_ALIAS("ip6t_set"); +MODULE_ALIAS("ipt_SET"); +MODULE_ALIAS("ip6t_SET"); + +static inline int +match_set(ip_set_id_t index, const struct sk_buff *skb, + u8 pf, u8 dim, u8 flags, int inv) +{ + if (ip_set_test(index, skb, pf, dim, flags)) + inv = !inv; + return inv; +} + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +static bool +set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v0 *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.u.compat.dim, + info->match_set.u.compat.flags, + info->match_set.u.compat.flags & IPSET_INV_MATCH); +} + +static void +compat_flags(struct xt_set_info_v0 *info) +{ + u_int8_t i; + + /* Fill out compatibility data according to enum ip_set_kopt */ + info->u.compat.dim = IPSET_DIM_ZERO; + if (info->u.flags[0] & IPSET_MATCH_INV) + info->u.compat.flags |= IPSET_INV_MATCH; + for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + info->u.compat.dim++; + if (info->u.flags[i] & IPSET_SRC) + info->u.compat.flags |= (1<u.compat.dim); + } +} + +static int +set_match_v0_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v0 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + return -ERANGE; + } + + /* Fill out compatibility data */ + compat_flags(&info->match_set); + + return 0; +} + +static void +set_match_v0_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v0 *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +static unsigned int +set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par->family, + info->add_set.u.compat.dim, + info->add_set.u.compat.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par->family, + info->del_set.u.compat.dim, + info->del_set.u.compat.flags); + + return XT_CONTINUE; +} + +static int +set_target_v0_checkentry(const struct xt_tgchk_param *par) +{ + struct xt_set_info_target_v0 *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find del_set index %u as target\n", + info->del_set.index); + return -ENOENT; + } + } + if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || + info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("Protocol error: SET target dimension " + "is over the limit!\n"); + return -ERANGE; + } + + /* Fill out compatibility data */ + compat_flags(&info->add_set); + compat_flags(&info->del_set); + + return 0; +} + +static void +set_target_v0_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +/* Revision 1: current interface to netfilter/iptables */ + +static bool +set_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.dim, + info->match_set.flags, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + return -ERANGE; + } + + return 0; +} + +static void +set_match_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +static unsigned int +set_target(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, + skb, par->family, + info->add_set.dim, + info->add_set.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, + skb, par->family, + info->add_set.dim, + info->del_set.flags); + + return XT_CONTINUE; +} + +static int +set_target_checkentry(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find del_set index %u as target\n", + info->del_set.index); + return -ENOENT; + } + } + if (info->add_set.dim > IPSET_DIM_MAX || + info->del_set.flags > IPSET_DIM_MAX) { + pr_warning("Protocol error: SET target dimension " + "is over the limit!\n"); + return -ERANGE; + } + + return 0; +} + +static void +set_target_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +static struct xt_match set_matches[] __read_mostly = { + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 0, + .match = set_match_v0, + .matchsize = sizeof(struct xt_set_info_match_v0), + .checkentry = set_match_v0_checkentry, + .destroy = set_match_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, +}; + +static struct xt_target set_targets[] __read_mostly = { + { + .name = "SET", + .revision = 0, + .family = NFPROTO_IPV4, + .target = set_target_v0, + .targetsize = sizeof(struct xt_set_info_target_v0), + .checkentry = set_target_v0_checkentry, + .destroy = set_target_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV4, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV6, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, +}; + +static int __init xt_set_init(void) +{ + int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches)); + + if (!ret) { + ret = xt_register_targets(set_targets, + ARRAY_SIZE(set_targets)); + if (ret) + xt_unregister_matches(set_matches, + ARRAY_SIZE(set_matches)); + } + return ret; +} + +static void __exit xt_set_fini(void) +{ + xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches)); + xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets)); +} + +module_init(xt_set_init); +module_exit(xt_set_fini); -- cgit v1.2.3-59-g8ed1b From 8da560ced56c423cd6d35803cd0244c944c676bd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 16:27:25 +0100 Subject: netfilter: ipset: use nla_parse_nested() Replace calls of the form: nla_parse(tb, ATTR_MAX, nla_data(attr), nla_len(attr), policy) by: nla_parse_nested(tb, ATTR_MAX, attr, policy) Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_core.c | 42 +++++++++++++++------------------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8a736247e85f..ae0f8b595106 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -246,8 +246,7 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), - ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -265,8 +264,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), - ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -666,10 +664,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse(tb, IPSET_ATTR_CREATE_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->create_policy)) { + nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], + set->type->create_policy)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1169,10 +1165,9 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); } else { @@ -1182,9 +1177,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(nla), nla_len(nla), - set->type->adt_policy)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1224,10 +1218,9 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); } else { @@ -1237,9 +1230,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(*tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(nla), nla_len(nla), - set->type->adt_policy)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1269,10 +1261,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, if (set == NULL) return -ENOENT; - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; read_lock_bh(&set->lock); -- cgit v1.2.3-59-g8ed1b From 582e1fc85ca3727abd4e99109a267c514ea5c260 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 16:57:37 +0100 Subject: netfilter: ipset: remove unnecessary includes None of the set types need uaccess.h since this is handled centrally in ip_set_core. Most set types additionally don't need bitops.h and spinlock.h since they use neither. tcp.h is only needed by those using before(), udp.h is not needed at all. Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_bitmap_ip.c | 1 - net/netfilter/ipset/ip_set_bitmap_ipmac.c | 3 --- net/netfilter/ipset/ip_set_bitmap_port.c | 5 ----- net/netfilter/ipset/ip_set_hash_ip.c | 3 --- net/netfilter/ipset/ip_set_hash_ipport.c | 3 --- net/netfilter/ipset/ip_set_hash_ipportip.c | 3 --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 --- net/netfilter/ipset/ip_set_hash_net.c | 3 --- net/netfilter/ipset/ip_set_hash_netport.c | 3 --- 9 files changed, 27 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 047440085509..bca96990218d 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index d826332d2937..5e790172deff 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -15,9 +15,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 92074bb64134..165f09b1a9cb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -9,13 +9,8 @@ #include #include -#include -#include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 53964bc831aa..43bcce200129 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index d9b192818e58..adbe787ea5dc 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 80dae9de5d18..22e23abb86c6 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8eacd8a46c1c..6033e8b54bbd 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index fb0e6a68d13e..c4db202b7da4 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 342250f9b114..34a165626ee9 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include -- cgit v1.2.3-59-g8ed1b From a00f1f3686d6a062b5295c092a9dff059adbdbf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 17:26:37 +0100 Subject: netfilter: ctnetlink: fix ctnetlink_parse_tuple() warning net/netfilter/nf_conntrack_netlink.c: In function 'ctnetlink_parse_tuple': net/netfilter/nf_conntrack_netlink.c:832:11: warning: comparison between 'enum ctattr_tuple' and 'enum ctattr_type' Use ctattr_type for the 'type' parameter since that's the type of all attributes passed to this function. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 61c73945bb94..cb1a8190ba29 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -803,7 +803,7 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, - enum ctattr_tuple type, u_int8_t l3num) + enum ctattr_type type, u_int8_t l3num) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; -- cgit v1.2.3-59-g8ed1b From a870c8c5cbe41bcf42cf4fa9f43d969b5134090b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:21:53 +0100 Subject: IPVS: use z modifier for sizeof() argument Reported-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d889f4f6be99..4d06617fab6c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1887,7 +1887,7 @@ static int __net_init __ip_vs_init(struct net *net) ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; } -- cgit v1.2.3-59-g8ed1b From 258e958b85cef23b1598515504426e8d0576d223 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:24:09 +0100 Subject: IPVS: remove duplicate initialisation or rs_table Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 98df59a12453..d7c2fa80cee3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3515,9 +3515,6 @@ int __net_init __ip_vs_control_init(struct net *net) } spin_lock_init(&ipvs->tot_stats->lock); - for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); proc_net_fops_create(net, "ip_vs_stats_percpu", 0, -- cgit v1.2.3-59-g8ed1b From a13676476e289ba03a23e27df130c7f33ab00e2f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:27:51 +0100 Subject: IPVS: Remove unused variables These variables are unused as a result of the recent netns work. Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b23bea62f708..5d75feadf4f4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1109,8 +1109,6 @@ extern int ip_vs_icmp_xmit_v6 * we are loaded. Just set ip_vs_drop_rate to 'n' and * we start to drop 1/rate of the packets */ -extern int ip_vs_drop_rate; -extern int ip_vs_drop_counter; static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { -- cgit v1.2.3-59-g8ed1b From 0443929ff0ecc4d1e690edaffa338cabe0863d3b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:29:04 +0100 Subject: IPVS: Allow compilation with CONFIG_SYSCTL disabled This is a rather naieve approach to allowing PVS to compile with CONFIG_SYSCTL disabled. I am working on a more comprehensive patch which will remove compilation of all sysctl-related IPVS code when CONFIG_SYSCTL is disabled. Reported-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 14 +++++++++----- net/netfilter/ipvs/ip_vs_lblc.c | 20 ++++++++++---------- net/netfilter/ipvs/ip_vs_lblcr.c | 20 ++++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d7c2fa80cee3..c73b0c831a2d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3552,10 +3552,15 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; +#ifdef CONFIG_SYSCTL ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl); - if (ipvs->sysctl_hdr == NULL) - goto err_reg; + if (ipvs->sysctl_hdr == NULL) { + if (!net_eq(net, &init_net)) + kfree(tbl); + goto err_dup; + } +#endif ip_vs_new_estimator(net, ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ @@ -3563,9 +3568,6 @@ int __net_init __ip_vs_control_init(struct net *net) schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); return 0; -err_reg: - if (!net_eq(net, &init_net)) - kfree(tbl); err_dup: free_percpu(ipvs->cpustats); err_alloc: @@ -3581,7 +3583,9 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) ip_vs_kill_estimator(net, ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->sysctl_hdr); +#endif proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index d5bec3371871..00b5ffab3768 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -554,33 +554,33 @@ static int __net_init __ip_vs_lblc_init(struct net *net) sizeof(vs_vars_table), GFP_KERNEL); if (ipvs->lblc_ctl_table == NULL) - goto err_dup; + return -ENOMEM; } else ipvs->lblc_ctl_table = vs_vars_table; ipvs->sysctl_lblc_expiration = 24*60*60*HZ; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; +#ifdef CONFIG_SYSCTL ipvs->lblc_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblc_ctl_table); - if (!ipvs->lblc_ctl_header) - goto err_reg; + if (!ipvs->lblc_ctl_header) { + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); + return -ENOMEM; + } +#endif return 0; - -err_reg: - if (!net_eq(net, &init_net)) - kfree(ipvs->lblc_ctl_table); - -err_dup: - return -ENOMEM; } static void __net_exit __ip_vs_lblc_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblc_ctl_header); +#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 61ae8cfcf0b4..bfa25f1ea9e4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -754,33 +754,33 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) sizeof(vs_vars_table), GFP_KERNEL); if (ipvs->lblcr_ctl_table == NULL) - goto err_dup; + return -ENOMEM; } else ipvs->lblcr_ctl_table = vs_vars_table; ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; +#ifdef CONFIG_SYSCTL ipvs->lblcr_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblcr_ctl_table); - if (!ipvs->lblcr_ctl_header) - goto err_reg; + if (!ipvs->lblcr_ctl_header) { + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); + return -ENOMEM; + } +#endif return 0; - -err_reg: - if (!net_eq(net, &init_net)) - kfree(ipvs->lblcr_ctl_table); - -err_dup: - return -ENOMEM; } static void __net_exit __ip_vs_lblcr_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblcr_ctl_header); +#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); -- cgit v1.2.3-59-g8ed1b From ed3d1e7b72069a3463b7e227b18cae4a09b0ddad Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:30:26 +0100 Subject: IPVS: Remove ip_vs_sync_cleanup from section __exit ip_vs_sync_cleanup() may be called from ip_vs_init() on error and thus needs to be accesible from section __init Reporte-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d5a6e640ea45..2a2a8363ca16 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1686,7 +1686,7 @@ int __init ip_vs_sync_init(void) return register_pernet_subsys(&ipvs_sync_ops); } -void __exit ip_vs_sync_cleanup(void) +void ip_vs_sync_cleanup(void) { unregister_pernet_subsys(&ipvs_sync_ops); } -- cgit v1.2.3-59-g8ed1b From e3e241b2769b27669d05f0a05083acd21b4faa2c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 18:52:42 +0100 Subject: netfilter: ipset: install ipset related header files Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 3 +++ include/linux/netfilter/ipset/Kbuild | 4 ++++ include/linux/netfilter/xt_set.h | 1 + 3 files changed, 8 insertions(+) create mode 100644 include/linux/netfilter/ipset/Kbuild diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 89c0d1e20d72..ba19544cce94 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -1,3 +1,5 @@ +header-y += ipset/ + header-y += nf_conntrack_common.h header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h @@ -55,6 +57,7 @@ header-y += xt_quota.h header-y += xt_rateest.h header-y += xt_realm.h header-y += xt_recent.h +header-y += xt_set.h header-y += xt_sctp.h header-y += xt_socket.h header-y += xt_state.h diff --git a/include/linux/netfilter/ipset/Kbuild b/include/linux/netfilter/ipset/Kbuild new file mode 100644 index 000000000000..601fe71d34d5 --- /dev/null +++ b/include/linux/netfilter/ipset/Kbuild @@ -0,0 +1,4 @@ +header-y += ip_set.h +header-y += ip_set_bitmap.h +header-y += ip_set_hash.h +header-y += ip_set_list.h diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h index 69b2bd1fb818..081f1ded2842 100644 --- a/include/linux/netfilter/xt_set.h +++ b/include/linux/netfilter/xt_set.h @@ -1,6 +1,7 @@ #ifndef _XT_SET_H #define _XT_SET_H +#include #include /* Revision 0 interface: backward compatible with netfilter/iptables */ -- cgit v1.2.3-59-g8ed1b From 316ed388802533bcfd3dffb38d2ba29ac5428456 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 2 Feb 2011 09:31:37 +0100 Subject: netfilter: ipset: add missing break statemtns in ip_set_get_ip_port() Don't fall through in the switch statement, otherwise IPv4 headers are incorrectly parsed again as IPv6 and the return value will always be 'false'. Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_getport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 76737bba28ee..4dd2785a5c72 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -118,8 +118,10 @@ ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) switch (pf) { case AF_INET: ret = ip_set_get_ip4_port(skb, src, port, &proto); + break; case AF_INET6: ret = ip_set_get_ip6_port(skb, src, port, &proto); + break; default: return false; } -- cgit v1.2.3-59-g8ed1b From 724bab476bcac9f7d0b5204cb06e346216d42166 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 2 Feb 2011 23:50:01 +0100 Subject: netfilter: ipset: fix linking with CONFIG_IPV6=n Add a dummy ip_set_get_ip6_port function that unconditionally returns false for CONFIG_IPV6=n and convert the real function to ipv6_skip_exthdr() to avoid pulling in the ip6_tables module when loading ipset. Signed-off-by: Patrick McHardy --- include/linux/netfilter/ipset/ip_set_getport.h | 10 ++++++++++ net/netfilter/ipset/ip_set_getport.c | 15 +++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h index 694c433298b8..3882a81a3b3c 100644 --- a/include/linux/netfilter/ipset/ip_set_getport.h +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -3,8 +3,18 @@ extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto); + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto); +#else +static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + return false; +} +#endif + extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port); diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 4dd2785a5c72..8d5227212686 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -93,21 +94,23 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto) { - unsigned int protooff = 0; - int protocol; - unsigned short fragoff; + int protoff; + u8 nexthdr; - protocol = ipv6_find_hdr(skb, &protooff, -1, &fragoff); - if (protocol <= 0 || fragoff) + nexthdr = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + if (protoff < 0) return false; - return get_port(skb, protocol, protooff, src, port, proto); + return get_port(skb, nexthdr, protoff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip6_port); +#endif bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) -- cgit v1.2.3-59-g8ed1b From 5f52bc3cdd1bb2e12e61639df19d9dcd530c4568 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 2 Feb 2011 23:56:00 +0100 Subject: netfilter: ipset: send error message manually When a message carries multiple commands and one of them triggers an error, we have to report to the userspace which one was that. The line number of the command plays this role and there's an attribute reserved in the header part of the message to be filled out with the error line number. In order not to modify the original message received from the userspace, we construct a new, complete netlink error message and modifies the attribute there, then send it. Netlink is notified not to send its ACK/error message. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index ae0f8b595106..8b1a54c1e400 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1098,7 +1098,7 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -call_ad(struct sk_buff *skb, struct ip_set *set, +call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { @@ -1118,12 +1118,25 @@ call_ad(struct sk_buff *skb, struct ip_set *set, return 0; if (lineno && use_lineno) { /* Error in restore/batch mode: send back lineno */ - struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); + struct sk_buff *skb2; + struct nlmsgerr *errmsg; + size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; - struct nlattr *cmdattr = (void *)nlh + min_len; + struct nlattr *cmdattr; u32 *errline; + skb2 = nlmsg_new(payload, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); + errmsg = nlmsg_data(rep); + errmsg->error = ret; + memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); + cmdattr = (void *)&errmsg->msg + min_len; + nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, nlh->nlmsg_len - min_len, ip_set_adt_policy); @@ -1131,6 +1144,10 @@ call_ad(struct sk_buff *skb, struct ip_set *set, errline = nla_data(cda[IPSET_ATTR_LINENO]); *errline = lineno; + + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + /* Signal netlink not to send its ACK/errmsg. */ + return -EINTR; } return ret; @@ -1169,7 +1186,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_DATA], set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); + ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, + use_lineno); } else { int nla_rem; @@ -1180,7 +1198,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_ADD, + ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); if (ret < 0) return ret; @@ -1222,7 +1240,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_DATA], set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); + ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, + use_lineno); } else { int nla_rem; @@ -1233,7 +1252,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_DEL, + ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); if (ret < 0) return ret; -- cgit v1.2.3-59-g8ed1b From 9291747f118d6404e509747b85ff5f6dfec368d2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Feb 2011 00:05:43 +0100 Subject: netfilter: xtables: add device group match Add a new 'devgroup' match to match on the device group of the incoming and outgoing network device of a packet. Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_devgroup.h | 21 +++++++++ net/netfilter/Kconfig | 9 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_devgroup.c | 82 +++++++++++++++++++++++++++++++++++ 5 files changed, 114 insertions(+) create mode 100644 include/linux/netfilter/xt_devgroup.h create mode 100644 net/netfilter/xt_devgroup.c diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index ba19544cce94..15e83bf3dd58 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -37,6 +37,7 @@ header-y += xt_connmark.h header-y += xt_conntrack.h header-y += xt_cpu.h header-y += xt_dccp.h +header-y += xt_devgroup.h header-y += xt_dscp.h header-y += xt_esp.h header-y += xt_hashlimit.h diff --git a/include/linux/netfilter/xt_devgroup.h b/include/linux/netfilter/xt_devgroup.h new file mode 100644 index 000000000000..1babde0ec900 --- /dev/null +++ b/include/linux/netfilter/xt_devgroup.h @@ -0,0 +1,21 @@ +#ifndef _XT_DEVGROUP_H +#define _XT_DEVGROUP_H + +#include + +enum xt_devgroup_flags { + XT_DEVGROUP_MATCH_SRC = 0x1, + XT_DEVGROUP_INVERT_SRC = 0x2, + XT_DEVGROUP_MATCH_DST = 0x4, + XT_DEVGROUP_INVERT_DST = 0x8, +}; + +struct xt_devgroup_info { + __u32 flags; + __u32 src_group; + __u32 src_mask; + __u32 dst_group; + __u32 dst_mask; +}; + +#endif /* _XT_DEVGROUP_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 06fa9e4e45c7..82a6e0d80f05 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -738,6 +738,15 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_DEVGROUP + tristate '"devgroup" match support' + depends on NETFILTER_ADVANCED + help + This options adds a `devgroup' match, which allows to match on the + device group a network device is assigned to. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DSCP tristate '"dscp" and "tos" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1148643559cb..d57a890eaee5 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c new file mode 100644 index 000000000000..d9202cdd25c9 --- /dev/null +++ b/net/netfilter/xt_devgroup.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: Device group match"); +MODULE_ALIAS("ipt_devgroup"); +MODULE_ALIAS("ip6t_devgroup"); + +static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & XT_DEVGROUP_MATCH_SRC && + (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) + return false; + + if (info->flags & XT_DEVGROUP_MATCH_DST && + (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) + return false; + + return true; +} + +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + if (info->flags & XT_DEVGROUP_MATCH_SRC && + par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD))) + return -EINVAL; + + if (info->flags & XT_DEVGROUP_MATCH_DST && + par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) + return -EINVAL; + + return 0; +} + +static struct xt_match devgroup_mt_reg __read_mostly = { + .name = "devgroup", + .match = devgroup_mt, + .checkentry = devgroup_mt_checkentry, + .matchsize = sizeof(struct xt_devgroup_info), + .family = NFPROTO_UNSPEC, + .me = THIS_MODULE +}; + +static int __init devgroup_mt_init(void) +{ + return xt_register_match(&devgroup_mt_reg); +} + +static void __exit devgroup_mt_exit(void) +{ + xt_unregister_match(&devgroup_mt_reg); +} + +module_init(devgroup_mt_init); +module_exit(devgroup_mt_exit); -- cgit v1.2.3-59-g8ed1b