aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_set_pipapo.h
blob: 25a75591583ebec982a53a67135721c7f4902047 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
// SPDX-License-Identifier: GPL-2.0-only

#ifndef _NFT_SET_PIPAPO_H

#include <linux/log2.h>
#include <net/ipv6.h>			/* For the maximum length of a field */

/* Count of concatenated fields depends on count of 32-bit nftables registers */
#define NFT_PIPAPO_MAX_FIELDS		NFT_REG32_COUNT

/* Restrict usage to multiple fields, make sure rbtree is used otherwise */
#define NFT_PIPAPO_MIN_FIELDS		2

/* Largest supported field size */
#define NFT_PIPAPO_MAX_BYTES		(sizeof(struct in6_addr))
#define NFT_PIPAPO_MAX_BITS		(NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)

/* Bits to be grouped together in table buckets depending on set size */
#define NFT_PIPAPO_GROUP_BITS_INIT	NFT_PIPAPO_GROUP_BITS_SMALL_SET
#define NFT_PIPAPO_GROUP_BITS_SMALL_SET	8
#define NFT_PIPAPO_GROUP_BITS_LARGE_SET	4
#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4				\
	BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) ||		\
		     (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
#define NFT_PIPAPO_GROUPS_PER_BYTE(f)	(BITS_PER_BYTE / (f)->bb)

/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
 * small group width, and switch to the big group width if the table gets
 * smaller than NFT_PIPAPO_LT_SIZE_LOW.
 *
 * Picking 2MiB as threshold (for a single table) avoids as much as possible
 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
 * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
 */
#define NFT_PIPAPO_LT_SIZE_THRESHOLD	(1 << 21)
#define NFT_PIPAPO_LT_SIZE_HYSTERESIS	(1 << 16)
#define NFT_PIPAPO_LT_SIZE_HIGH		NFT_PIPAPO_LT_SIZE_THRESHOLD
#define NFT_PIPAPO_LT_SIZE_LOW		NFT_PIPAPO_LT_SIZE_THRESHOLD -	\
					NFT_PIPAPO_LT_SIZE_HYSTERESIS

/* Fields are padded to 32 bits in input registers */
#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f)				\
	(round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
#define NFT_PIPAPO_GROUPS_PADDING(f)					\
	(NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups /		\
					    NFT_PIPAPO_GROUPS_PER_BYTE(f))

/* Number of buckets given by 2 ^ n, with n bucket bits */
#define NFT_PIPAPO_BUCKETS(bb)		(1 << (bb))

/* Each n-bit range maps to up to n * 2 rules */
#define NFT_PIPAPO_MAP_NBITS		(const_ilog2(NFT_PIPAPO_MAX_BITS * 2))

/* Use the rest of mapping table buckets for rule indices, but it makes no sense
 * to exceed 32 bits
 */
#if BITS_PER_LONG == 64
#define NFT_PIPAPO_MAP_TOBITS		32
#else
#define NFT_PIPAPO_MAP_TOBITS		(BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
#endif

/* ...which gives us the highest allowed index for a rule */
#define NFT_PIPAPO_RULE0_MAX		((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
					- (1UL << NFT_PIPAPO_MAP_NBITS))

/* Definitions for vectorised implementations */
#ifdef NFT_PIPAPO_ALIGN
#define NFT_PIPAPO_ALIGN_HEADROOM					\
	(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
#define NFT_PIPAPO_LT_ALIGN(lt)		(PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
#define NFT_PIPAPO_LT_ASSIGN(field, x)					\
	do {								\
		(field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x);		\
		(field)->lt = (x);					\
	} while (0)
#else
#define NFT_PIPAPO_ALIGN_HEADROOM	0
#define NFT_PIPAPO_LT_ALIGN(lt)		(lt)
#define NFT_PIPAPO_LT_ASSIGN(field, x)	((field)->lt = (x))
#endif /* NFT_PIPAPO_ALIGN */

#define nft_pipapo_for_each_field(field, index, match)		\
	for ((field) = (match)->f, (index) = 0;			\
	     (index) < (match)->field_count;			\
	     (index)++, (field)++)

/**
 * union nft_pipapo_map_bucket - Bucket of mapping table
 * @to:		First rule number (in next field) this rule maps to
 * @n:		Number of rules (in next field) this rule maps to
 * @e:		If there's no next field, pointer to element this rule maps to
 */
union nft_pipapo_map_bucket {
	struct {
#if BITS_PER_LONG == 64
		static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
		u32 to;

		static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
		u32 n;
#else
		unsigned long to:NFT_PIPAPO_MAP_TOBITS;
		unsigned long  n:NFT_PIPAPO_MAP_NBITS;
#endif
	};
	struct nft_pipapo_elem *e;
};

/**
 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
 * @groups:	Amount of bit groups
 * @rules:	Number of inserted rules
 * @bsize:	Size of each bucket in lookup table, in longs
 * @bb:		Number of bits grouped together in lookup table buckets
 * @lt:		Lookup table: 'groups' rows of buckets
 * @lt_aligned:	Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
 * @mt:		Mapping table: one bucket per rule
 */
struct nft_pipapo_field {
	int groups;
	unsigned long rules;
	size_t bsize;
	int bb;
#ifdef NFT_PIPAPO_ALIGN
	unsigned long *lt_aligned;
#endif
	unsigned long *lt;
	union nft_pipapo_map_bucket *mt;
};

/**
 * struct nft_pipapo_match - Data used for lookup and matching
 * @field_count		Amount of fields in set
 * @scratch:		Preallocated per-CPU maps for partial matching results
 * @scratch_aligned:	Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
 * @bsize_max:		Maximum lookup table bucket size of all fields, in longs
 * @rcu			Matching data is swapped on commits
 * @f:			Fields, with lookup and mapping tables
 */
struct nft_pipapo_match {
	int field_count;
#ifdef NFT_PIPAPO_ALIGN
	unsigned long * __percpu *scratch_aligned;
#endif
	unsigned long * __percpu *scratch;
	size_t bsize_max;
	struct rcu_head rcu;
	struct nft_pipapo_field f[];
};

/**
 * struct nft_pipapo - Representation of a set
 * @match:	Currently in-use matching data
 * @clone:	Copy where pending insertions and deletions are kept
 * @width:	Total bytes to be matched for one packet, including padding
 * @dirty:	Working copy has pending insertions or deletions
 * @last_gc:	Timestamp of last garbage collection run, jiffies
 */
struct nft_pipapo {
	struct nft_pipapo_match __rcu *match;
	struct nft_pipapo_match *clone;
	int width;
	bool dirty;
	unsigned long last_gc;
};

struct nft_pipapo_elem;

/**
 * struct nft_pipapo_elem - API-facing representation of single set element
 * @ext:	nftables API extensions
 */
struct nft_pipapo_elem {
	struct nft_set_ext ext;
};

int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
		  union nft_pipapo_map_bucket *mt, bool match_only);

/**
 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
 * @f:		Field including lookup table
 * @dst:	Area to store result
 * @data:	Input data selecting table buckets
 */
static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
						 unsigned long *dst,
						 const u8 *data)
{
	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
	int group;

	for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
		u8 v;

		v = *data >> 4;
		__bitmap_and(dst, dst, lt + v * f->bsize,
			     f->bsize * BITS_PER_LONG);
		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);

		v = *data & 0x0f;
		__bitmap_and(dst, dst, lt + v * f->bsize,
			     f->bsize * BITS_PER_LONG);
		lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
	}
}

/**
 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
 * @f:		Field including lookup table
 * @dst:	Area to store result
 * @data:	Input data selecting table buckets
 */
static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
						 unsigned long *dst,
						 const u8 *data)
{
	unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
	int group;

	for (group = 0; group < f->groups; group++, data++) {
		__bitmap_and(dst, dst, lt + *data * f->bsize,
			     f->bsize * BITS_PER_LONG);
		lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
	}
}

/**
 * pipapo_estimate_size() - Estimate worst-case for set size
 * @desc:	Set description, element count and field description used here
 *
 * The size for this set type can vary dramatically, as it depends on the number
 * of rules (composing netmasks) the entries expand to. We compute the worst
 * case here.
 *
 * In general, for a non-ranged entry or a single composing netmask, we need
 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
 * is, each input bit needs four bits of matching data), plus a bucket in the
 * mapping table for each field.
 *
 * Return: worst-case set size in bytes, 0 on any overflow
 */
static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
{
	unsigned long entry_size;
	u64 size;
	int i;

	for (i = 0, entry_size = 0; i < desc->field_count; i++) {
		unsigned long rules;

		if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
			return 0;

		/* Worst-case ranges for each concatenated field: each n-bit
		 * field can expand to up to n * 2 rules in each bucket, and
		 * each rule also needs a mapping bucket.
		 */
		rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
		entry_size += rules *
			      NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
			      BITS_PER_BYTE;
		entry_size += rules * sizeof(union nft_pipapo_map_bucket);
	}

	/* Rules in lookup and mapping tables are needed for each entry */
	size = desc->size * entry_size;
	if (size && div_u64(size, desc->size) != entry_size)
		return 0;

	size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;

	size += sizeof(struct nft_pipapo_field) * desc->field_count;

	return size;
}

#endif /* _NFT_SET_PIPAPO_H */