diff options
Diffstat (limited to 'lib')
91 files changed, 5880 insertions, 1931 deletions
diff --git a/lib/842/842.h b/lib/842/842.h new file mode 100644 index 000000000000..7c200030acf7 --- /dev/null +++ b/lib/842/842.h @@ -0,0 +1,127 @@ + +#ifndef __842_H__ +#define __842_H__ + +/* The 842 compressed format is made up of multiple blocks, each of + * which have the format: + * + * <template>[arg1][arg2][arg3][arg4] + * + * where there are between 0 and 4 template args, depending on the specific + * template operation. For normal operations, each arg is either a specific + * number of data bytes to add to the output buffer, or an index pointing + * to a previously-written number of data bytes to copy to the output buffer. + * + * The template code is a 5-bit value. This code indicates what to do with + * the following data. Template codes from 0 to 0x19 should use the template + * table, the static "decomp_ops" table used in decompress. For each template + * (table row), there are between 1 and 4 actions; each action corresponds to + * an arg following the template code bits. Each action is either a "data" + * type action, or a "index" type action, and each action results in 2, 4, or 8 + * bytes being written to the output buffer. Each template (i.e. all actions + * in the table row) will add up to 8 bytes being written to the output buffer. + * Any row with less than 4 actions is padded with noop actions, indicated by + * N0 (for which there is no corresponding arg in the compressed data buffer). + * + * "Data" actions, indicated in the table by D2, D4, and D8, mean that the + * corresponding arg is 2, 4, or 8 bytes, respectively, in the compressed data + * buffer should be copied directly to the output buffer. + * + * "Index" actions, indicated in the table by I2, I4, and I8, mean the + * corresponding arg is an index parameter that points to, respectively, a 2, + * 4, or 8 byte value already in the output buffer, that should be copied to + * the end of the output buffer. Essentially, the index points to a position + * in a ring buffer that contains the last N bytes of output buffer data. + * The number of bits for each index's arg are: 8 bits for I2, 9 bits for I4, + * and 8 bits for I8. Since each index points to a 2, 4, or 8 byte section, + * this means that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4 + * can reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference 2048 + * bytes ((2^8 = 256) * 8 bytes). Think of it as a kind-of ring buffer for + * each of I2, I4, and I8 that are updated for each byte written to the output + * buffer. In this implementation, the output buffer is directly used for each + * index; there is no additional memory required. Note that the index is into + * a ring buffer, not a sliding window; for example, if there have been 260 + * bytes written to the output buffer, an I2 index of 0 would index to byte 256 + * in the output buffer, while an I2 index of 16 would index to byte 16 in the + * output buffer. + * + * There are also 3 special template codes; 0x1b for "repeat", 0x1c for + * "zeros", and 0x1e for "end". The "repeat" operation is followed by a 6 bit + * arg N indicating how many times to repeat. The last 8 bytes written to the + * output buffer are written again to the output buffer, N + 1 times. The + * "zeros" operation, which has no arg bits, writes 8 zeros to the output + * buffer. The "end" operation, which also has no arg bits, signals the end + * of the compressed data. There may be some number of padding (don't care, + * but usually 0) bits after the "end" operation bits, to fill the buffer + * length to a specific byte multiple (usually a multiple of 8, 16, or 32 + * bytes). + * + * This software implementation also uses one of the undefined template values, + * 0x1d as a special "short data" template code, to represent less than 8 bytes + * of uncompressed data. It is followed by a 3 bit arg N indicating how many + * data bytes will follow, and then N bytes of data, which should be copied to + * the output buffer. This allows the software 842 compressor to accept input + * buffers that are not an exact multiple of 8 bytes long. However, those + * compressed buffers containing this sw-only template will be rejected by + * the 842 hardware decompressor, and must be decompressed with this software + * library. The 842 software compression module includes a parameter to + * disable using this sw-only "short data" template, and instead simply + * reject any input buffer that is not a multiple of 8 bytes long. + * + * After all actions for each operation code are processed, another template + * code is in the next 5 bits. The decompression ends once the "end" template + * code is detected. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <asm/unaligned.h> + +#include <linux/sw842.h> + +/* special templates */ +#define OP_REPEAT (0x1B) +#define OP_ZEROS (0x1C) +#define OP_END (0x1E) + +/* sw only template - this is not in the hw design; it's used only by this + * software compressor and decompressor, to allow input buffers that aren't + * a multiple of 8. + */ +#define OP_SHORT_DATA (0x1D) + +/* additional bits of each op param */ +#define OP_BITS (5) +#define REPEAT_BITS (6) +#define SHORT_DATA_BITS (3) +#define I2_BITS (8) +#define I4_BITS (9) +#define I8_BITS (8) + +#define REPEAT_BITS_MAX (0x3f) +#define SHORT_DATA_BITS_MAX (0x7) + +/* Arbitrary values used to indicate action */ +#define OP_ACTION (0x70) +#define OP_ACTION_INDEX (0x10) +#define OP_ACTION_DATA (0x20) +#define OP_ACTION_NOOP (0x40) +#define OP_AMOUNT (0x0f) +#define OP_AMOUNT_0 (0x00) +#define OP_AMOUNT_2 (0x02) +#define OP_AMOUNT_4 (0x04) +#define OP_AMOUNT_8 (0x08) + +#define D2 (OP_ACTION_DATA | OP_AMOUNT_2) +#define D4 (OP_ACTION_DATA | OP_AMOUNT_4) +#define D8 (OP_ACTION_DATA | OP_AMOUNT_8) +#define I2 (OP_ACTION_INDEX | OP_AMOUNT_2) +#define I4 (OP_ACTION_INDEX | OP_AMOUNT_4) +#define I8 (OP_ACTION_INDEX | OP_AMOUNT_8) +#define N0 (OP_ACTION_NOOP | OP_AMOUNT_0) + +/* the max of the regular templates - not including the special templates */ +#define OPS_MAX (0x1a) + +#endif diff --git a/lib/842/842_compress.c b/lib/842/842_compress.c new file mode 100644 index 000000000000..7ce68948e68c --- /dev/null +++ b/lib/842/842_compress.c @@ -0,0 +1,626 @@ +/* + * 842 Software Compression + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * See 842.h for details of the 842 compressed format. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define MODULE_NAME "842_compress" + +#include <linux/hashtable.h> + +#include "842.h" +#include "842_debugfs.h" + +#define SW842_HASHTABLE8_BITS (10) +#define SW842_HASHTABLE4_BITS (11) +#define SW842_HASHTABLE2_BITS (10) + +/* By default, we allow compressing input buffers of any length, but we must + * use the non-standard "short data" template so the decompressor can correctly + * reproduce the uncompressed data buffer at the right length. However the + * hardware 842 compressor will not recognize the "short data" template, and + * will fail to decompress any compressed buffer containing it (I have no idea + * why anyone would want to use software to compress and hardware to decompress + * but that's beside the point). This parameter forces the compression + * function to simply reject any input buffer that isn't a multiple of 8 bytes + * long, instead of using the "short data" template, so that all compressed + * buffers produced by this function will be decompressable by the 842 hardware + * decompressor. Unless you have a specific need for that, leave this disabled + * so that any length buffer can be compressed. + */ +static bool sw842_strict; +module_param_named(strict, sw842_strict, bool, 0644); + +static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ + { I8, N0, N0, N0, 0x19 }, /* 8 */ + { I4, I4, N0, N0, 0x18 }, /* 18 */ + { I4, I2, I2, N0, 0x17 }, /* 25 */ + { I2, I2, I4, N0, 0x13 }, /* 25 */ + { I2, I2, I2, I2, 0x12 }, /* 32 */ + { I4, I2, D2, N0, 0x16 }, /* 33 */ + { I4, D2, I2, N0, 0x15 }, /* 33 */ + { I2, D2, I4, N0, 0x0e }, /* 33 */ + { D2, I2, I4, N0, 0x09 }, /* 33 */ + { I2, I2, I2, D2, 0x11 }, /* 40 */ + { I2, I2, D2, I2, 0x10 }, /* 40 */ + { I2, D2, I2, I2, 0x0d }, /* 40 */ + { D2, I2, I2, I2, 0x08 }, /* 40 */ + { I4, D4, N0, N0, 0x14 }, /* 41 */ + { D4, I4, N0, N0, 0x04 }, /* 41 */ + { I2, I2, D4, N0, 0x0f }, /* 48 */ + { I2, D2, I2, D2, 0x0c }, /* 48 */ + { I2, D4, I2, N0, 0x0b }, /* 48 */ + { D2, I2, I2, D2, 0x07 }, /* 48 */ + { D2, I2, D2, I2, 0x06 }, /* 48 */ + { D4, I2, I2, N0, 0x03 }, /* 48 */ + { I2, D2, D4, N0, 0x0a }, /* 56 */ + { D2, I2, D4, N0, 0x05 }, /* 56 */ + { D4, I2, D2, N0, 0x02 }, /* 56 */ + { D4, D2, I2, N0, 0x01 }, /* 56 */ + { D8, N0, N0, N0, 0x00 }, /* 64 */ +}; + +struct sw842_hlist_node8 { + struct hlist_node node; + u64 data; + u8 index; +}; + +struct sw842_hlist_node4 { + struct hlist_node node; + u32 data; + u16 index; +}; + +struct sw842_hlist_node2 { + struct hlist_node node; + u16 data; + u8 index; +}; + +#define INDEX_NOT_FOUND (-1) +#define INDEX_NOT_CHECKED (-2) + +struct sw842_param { + u8 *in; + u8 *instart; + u64 ilen; + u8 *out; + u64 olen; + u8 bit; + u64 data8[1]; + u32 data4[2]; + u16 data2[4]; + int index8[1]; + int index4[2]; + int index2[4]; + DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); + DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); + DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); + struct sw842_hlist_node8 node8[1 << I8_BITS]; + struct sw842_hlist_node4 node4[1 << I4_BITS]; + struct sw842_hlist_node2 node2[1 << I2_BITS]; +}; + +#define get_input_data(p, o, b) \ + be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) + +#define init_hashtable_nodes(p, b) do { \ + int _i; \ + hash_init((p)->htable##b); \ + for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \ + (p)->node##b[_i].index = _i; \ + (p)->node##b[_i].data = 0; \ + INIT_HLIST_NODE(&(p)->node##b[_i].node); \ + } \ +} while (0) + +#define find_index(p, b, n) ({ \ + struct sw842_hlist_node##b *_n; \ + p->index##b[n] = INDEX_NOT_FOUND; \ + hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \ + if (p->data##b[n] == _n->data) { \ + p->index##b[n] = _n->index; \ + break; \ + } \ + } \ + p->index##b[n] >= 0; \ +}) + +#define check_index(p, b, n) \ + ((p)->index##b[n] == INDEX_NOT_CHECKED \ + ? find_index(p, b, n) \ + : (p)->index##b[n] >= 0) + +#define replace_hash(p, b, i, d) do { \ + struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \ + hash_del(&_n->node); \ + _n->data = (p)->data##b[d]; \ + pr_debug("add hash index%x %x pos %x data %lx\n", b, \ + (unsigned int)_n->index, \ + (unsigned int)((p)->in - (p)->instart), \ + (unsigned long)_n->data); \ + hash_add((p)->htable##b, &_n->node, _n->data); \ +} while (0) + +static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; + +static int add_bits(struct sw842_param *p, u64 d, u8 n); + +static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) +{ + int ret; + + if (n <= s) + return -EINVAL; + + ret = add_bits(p, d >> s, n - s); + if (ret) + return ret; + return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); +} + +static int add_bits(struct sw842_param *p, u64 d, u8 n) +{ + int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; + u64 o; + u8 *out = p->out; + + pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); + + if (n > 64) + return -EINVAL; + + /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), + * or if we're at the end of the output buffer and would write past end + */ + if (bits > 64) + return __split_add_bits(p, d, n, 32); + else if (p->olen < 8 && bits > 32 && bits <= 56) + return __split_add_bits(p, d, n, 16); + else if (p->olen < 4 && bits > 16 && bits <= 24) + return __split_add_bits(p, d, n, 8); + + if (DIV_ROUND_UP(bits, 8) > p->olen) + return -ENOSPC; + + o = *out & bmask[b]; + d <<= s; + + if (bits <= 8) + *out = o | d; + else if (bits <= 16) + put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); + else if (bits <= 24) + put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); + else if (bits <= 32) + put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); + else if (bits <= 40) + put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); + else if (bits <= 48) + put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); + else if (bits <= 56) + put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); + else + put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); + + p->bit += n; + + if (p->bit > 7) { + p->out += p->bit / 8; + p->olen -= p->bit / 8; + p->bit %= 8; + } + + return 0; +} + +static int add_template(struct sw842_param *p, u8 c) +{ + int ret, i, b = 0; + u8 *t = comp_ops[c]; + bool inv = false; + + if (c >= OPS_MAX) + return -EINVAL; + + pr_debug("template %x\n", t[4]); + + ret = add_bits(p, t[4], OP_BITS); + if (ret) + return ret; + + for (i = 0; i < 4; i++) { + pr_debug("op %x\n", t[i]); + + switch (t[i] & OP_AMOUNT) { + case OP_AMOUNT_8: + if (b) + inv = true; + else if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index8[0], I8_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data8[0], 64); + else + inv = true; + break; + case OP_AMOUNT_4: + if (b == 2 && t[i] & OP_ACTION_DATA) + ret = add_bits(p, get_input_data(p, 2, 32), 32); + else if (b != 0 && b != 4) + inv = true; + else if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index4[b >> 2], I4_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data4[b >> 2], 32); + else + inv = true; + break; + case OP_AMOUNT_2: + if (b != 0 && b != 2 && b != 4 && b != 6) + inv = true; + if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index2[b >> 1], I2_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data2[b >> 1], 16); + else + inv = true; + break; + case OP_AMOUNT_0: + inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); + break; + default: + inv = true; + break; + } + + if (ret) + return ret; + + if (inv) { + pr_err("Invalid templ %x op %d : %x %x %x %x\n", + c, i, t[0], t[1], t[2], t[3]); + return -EINVAL; + } + + b += t[i] & OP_AMOUNT; + } + + if (b != 8) { + pr_err("Invalid template %x len %x : %x %x %x %x\n", + c, b, t[0], t[1], t[2], t[3]); + return -EINVAL; + } + + if (sw842_template_counts) + atomic_inc(&template_count[t[4]]); + + return 0; +} + +static int add_repeat_template(struct sw842_param *p, u8 r) +{ + int ret; + + /* repeat param is 0-based */ + if (!r || --r > REPEAT_BITS_MAX) + return -EINVAL; + + ret = add_bits(p, OP_REPEAT, OP_BITS); + if (ret) + return ret; + + ret = add_bits(p, r, REPEAT_BITS); + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_repeat_count); + + return 0; +} + +static int add_short_data_template(struct sw842_param *p, u8 b) +{ + int ret, i; + + if (!b || b > SHORT_DATA_BITS_MAX) + return -EINVAL; + + ret = add_bits(p, OP_SHORT_DATA, OP_BITS); + if (ret) + return ret; + + ret = add_bits(p, b, SHORT_DATA_BITS); + if (ret) + return ret; + + for (i = 0; i < b; i++) { + ret = add_bits(p, p->in[i], 8); + if (ret) + return ret; + } + + if (sw842_template_counts) + atomic_inc(&template_short_data_count); + + return 0; +} + +static int add_zeros_template(struct sw842_param *p) +{ + int ret = add_bits(p, OP_ZEROS, OP_BITS); + + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_zeros_count); + + return 0; +} + +static int add_end_template(struct sw842_param *p) +{ + int ret = add_bits(p, OP_END, OP_BITS); + + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_end_count); + + return 0; +} + +static bool check_template(struct sw842_param *p, u8 c) +{ + u8 *t = comp_ops[c]; + int i, match, b = 0; + + if (c >= OPS_MAX) + return false; + + for (i = 0; i < 4; i++) { + if (t[i] & OP_ACTION_INDEX) { + if (t[i] & OP_AMOUNT_2) + match = check_index(p, 2, b >> 1); + else if (t[i] & OP_AMOUNT_4) + match = check_index(p, 4, b >> 2); + else if (t[i] & OP_AMOUNT_8) + match = check_index(p, 8, 0); + else + return false; + if (!match) + return false; + } + + b += t[i] & OP_AMOUNT; + } + + return true; +} + +static void get_next_data(struct sw842_param *p) +{ + p->data8[0] = get_input_data(p, 0, 64); + p->data4[0] = get_input_data(p, 0, 32); + p->data4[1] = get_input_data(p, 4, 32); + p->data2[0] = get_input_data(p, 0, 16); + p->data2[1] = get_input_data(p, 2, 16); + p->data2[2] = get_input_data(p, 4, 16); + p->data2[3] = get_input_data(p, 6, 16); +} + +/* update the hashtable entries. + * only call this after finding/adding the current template + * the dataN fields for the current 8 byte block must be already updated + */ +static void update_hashtables(struct sw842_param *p) +{ + u64 pos = p->in - p->instart; + u64 n8 = (pos >> 3) % (1 << I8_BITS); + u64 n4 = (pos >> 2) % (1 << I4_BITS); + u64 n2 = (pos >> 1) % (1 << I2_BITS); + + replace_hash(p, 8, n8, 0); + replace_hash(p, 4, n4, 0); + replace_hash(p, 4, n4, 1); + replace_hash(p, 2, n2, 0); + replace_hash(p, 2, n2, 1); + replace_hash(p, 2, n2, 2); + replace_hash(p, 2, n2, 3); +} + +/* find the next template to use, and add it + * the p->dataN fields must already be set for the current 8 byte block + */ +static int process_next(struct sw842_param *p) +{ + int ret, i; + + p->index8[0] = INDEX_NOT_CHECKED; + p->index4[0] = INDEX_NOT_CHECKED; + p->index4[1] = INDEX_NOT_CHECKED; + p->index2[0] = INDEX_NOT_CHECKED; + p->index2[1] = INDEX_NOT_CHECKED; + p->index2[2] = INDEX_NOT_CHECKED; + p->index2[3] = INDEX_NOT_CHECKED; + + /* check up to OPS_MAX - 1; last op is our fallback */ + for (i = 0; i < OPS_MAX - 1; i++) { + if (check_template(p, i)) + break; + } + + ret = add_template(p, i); + if (ret) + return ret; + + return 0; +} + +/** + * sw842_compress + * + * Compress the uncompressed buffer of length @ilen at @in to the output buffer + * @out, using no more than @olen bytes, using the 842 compression format. + * + * Returns: 0 on success, error on failure. The @olen parameter + * will contain the number of output bytes written on success, or + * 0 on error. + */ +int sw842_compress(const u8 *in, unsigned int ilen, + u8 *out, unsigned int *olen, void *wmem) +{ + struct sw842_param *p = (struct sw842_param *)wmem; + int ret; + u64 last, next, pad, total; + u8 repeat_count = 0; + + BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); + + init_hashtable_nodes(p, 8); + init_hashtable_nodes(p, 4); + init_hashtable_nodes(p, 2); + + p->in = (u8 *)in; + p->instart = p->in; + p->ilen = ilen; + p->out = out; + p->olen = *olen; + p->bit = 0; + + total = p->olen; + + *olen = 0; + + /* if using strict mode, we can only compress a multiple of 8 */ + if (sw842_strict && (ilen % 8)) { + pr_err("Using strict mode, can't compress len %d\n", ilen); + return -EINVAL; + } + + /* let's compress at least 8 bytes, mkay? */ + if (unlikely(ilen < 8)) + goto skip_comp; + + /* make initial 'last' different so we don't match the first time */ + last = ~get_unaligned((u64 *)p->in); + + while (p->ilen > 7) { + next = get_unaligned((u64 *)p->in); + + /* must get the next data, as we need to update the hashtable + * entries with the new data every time + */ + get_next_data(p); + + /* we don't care about endianness in last or next; + * we're just comparing 8 bytes to another 8 bytes, + * they're both the same endianness + */ + if (next == last) { + /* repeat count bits are 0-based, so we stop at +1 */ + if (++repeat_count <= REPEAT_BITS_MAX) + goto repeat; + } + if (repeat_count) { + ret = add_repeat_template(p, repeat_count); + repeat_count = 0; + if (next == last) /* reached max repeat bits */ + goto repeat; + } + + if (next == 0) + ret = add_zeros_template(p); + else + ret = process_next(p); + + if (ret) + return ret; + +repeat: + last = next; + update_hashtables(p); + p->in += 8; + p->ilen -= 8; + } + + if (repeat_count) { + ret = add_repeat_template(p, repeat_count); + if (ret) + return ret; + } + +skip_comp: + if (p->ilen > 0) { + ret = add_short_data_template(p, p->ilen); + if (ret) + return ret; + + p->in += p->ilen; + p->ilen = 0; + } + + ret = add_end_template(p); + if (ret) + return ret; + + if (p->bit) { + p->out++; + p->olen--; + p->bit = 0; + } + + /* pad compressed length to multiple of 8 */ + pad = (8 - ((total - p->olen) % 8)) % 8; + if (pad) { + if (pad > p->olen) /* we were so close! */ + return -ENOSPC; + memset(p->out, 0, pad); + p->out += pad; + p->olen -= pad; + } + + if (unlikely((total - p->olen) > UINT_MAX)) + return -ENOSPC; + + *olen = total - p->olen; + + return 0; +} +EXPORT_SYMBOL_GPL(sw842_compress); + +static int __init sw842_init(void) +{ + if (sw842_template_counts) + sw842_debugfs_create(); + + return 0; +} +module_init(sw842_init); + +static void __exit sw842_exit(void) +{ + if (sw842_template_counts) + sw842_debugfs_remove(); +} +module_exit(sw842_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software 842 Compressor"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/lib/842/842_debugfs.h b/lib/842/842_debugfs.h new file mode 100644 index 000000000000..e7f3bffaf255 --- /dev/null +++ b/lib/842/842_debugfs.h @@ -0,0 +1,52 @@ + +#ifndef __842_DEBUGFS_H__ +#define __842_DEBUGFS_H__ + +#include <linux/debugfs.h> + +static bool sw842_template_counts; +module_param_named(template_counts, sw842_template_counts, bool, 0444); + +static atomic_t template_count[OPS_MAX], template_repeat_count, + template_zeros_count, template_short_data_count, template_end_count; + +static struct dentry *sw842_debugfs_root; + +static int __init sw842_debugfs_create(void) +{ + umode_t m = S_IRUGO | S_IWUSR; + int i; + + if (!debugfs_initialized()) + return -ENODEV; + + sw842_debugfs_root = debugfs_create_dir(MODULE_NAME, NULL); + if (IS_ERR(sw842_debugfs_root)) + return PTR_ERR(sw842_debugfs_root); + + for (i = 0; i < ARRAY_SIZE(template_count); i++) { + char name[32]; + + snprintf(name, 32, "template_%02x", i); + debugfs_create_atomic_t(name, m, sw842_debugfs_root, + &template_count[i]); + } + debugfs_create_atomic_t("template_repeat", m, sw842_debugfs_root, + &template_repeat_count); + debugfs_create_atomic_t("template_zeros", m, sw842_debugfs_root, + &template_zeros_count); + debugfs_create_atomic_t("template_short_data", m, sw842_debugfs_root, + &template_short_data_count); + debugfs_create_atomic_t("template_end", m, sw842_debugfs_root, + &template_end_count); + + return 0; +} + +static void __exit sw842_debugfs_remove(void) +{ + if (sw842_debugfs_root && !IS_ERR(sw842_debugfs_root)) + debugfs_remove_recursive(sw842_debugfs_root); +} + +#endif diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c new file mode 100644 index 000000000000..5446ff0c9ba0 --- /dev/null +++ b/lib/842/842_decompress.c @@ -0,0 +1,405 @@ +/* + * 842 Software Decompression + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * See 842.h for details of the 842 compressed format. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define MODULE_NAME "842_decompress" + +#include "842.h" +#include "842_debugfs.h" + +/* rolling fifo sizes */ +#define I2_FIFO_SIZE (2 * (1 << I2_BITS)) +#define I4_FIFO_SIZE (4 * (1 << I4_BITS)) +#define I8_FIFO_SIZE (8 * (1 << I8_BITS)) + +static u8 decomp_ops[OPS_MAX][4] = { + { D8, N0, N0, N0 }, + { D4, D2, I2, N0 }, + { D4, I2, D2, N0 }, + { D4, I2, I2, N0 }, + { D4, I4, N0, N0 }, + { D2, I2, D4, N0 }, + { D2, I2, D2, I2 }, + { D2, I2, I2, D2 }, + { D2, I2, I2, I2 }, + { D2, I2, I4, N0 }, + { I2, D2, D4, N0 }, + { I2, D4, I2, N0 }, + { I2, D2, I2, D2 }, + { I2, D2, I2, I2 }, + { I2, D2, I4, N0 }, + { I2, I2, D4, N0 }, + { I2, I2, D2, I2 }, + { I2, I2, I2, D2 }, + { I2, I2, I2, I2 }, + { I2, I2, I4, N0 }, + { I4, D4, N0, N0 }, + { I4, D2, I2, N0 }, + { I4, I2, D2, N0 }, + { I4, I2, I2, N0 }, + { I4, I4, N0, N0 }, + { I8, N0, N0, N0 } +}; + +struct sw842_param { + u8 *in; + u8 bit; + u64 ilen; + u8 *out; + u8 *ostart; + u64 olen; +}; + +#define beN_to_cpu(d, s) \ + ((s) == 2 ? be16_to_cpu(get_unaligned((__be16 *)d)) : \ + (s) == 4 ? be32_to_cpu(get_unaligned((__be32 *)d)) : \ + (s) == 8 ? be64_to_cpu(get_unaligned((__be64 *)d)) : \ + WARN(1, "pr_debug param err invalid size %x\n", s)) + +static int next_bits(struct sw842_param *p, u64 *d, u8 n); + +static int __split_next_bits(struct sw842_param *p, u64 *d, u8 n, u8 s) +{ + u64 tmp = 0; + int ret; + + if (n <= s) { + pr_debug("split_next_bits invalid n %u s %u\n", n, s); + return -EINVAL; + } + + ret = next_bits(p, &tmp, n - s); + if (ret) + return ret; + ret = next_bits(p, d, s); + if (ret) + return ret; + *d |= tmp << s; + return 0; +} + +static int next_bits(struct sw842_param *p, u64 *d, u8 n) +{ + u8 *in = p->in, b = p->bit, bits = b + n; + + if (n > 64) { + pr_debug("next_bits invalid n %u\n", n); + return -EINVAL; + } + + /* split this up if reading > 8 bytes, or if we're at the end of + * the input buffer and would read past the end + */ + if (bits > 64) + return __split_next_bits(p, d, n, 32); + else if (p->ilen < 8 && bits > 32 && bits <= 56) + return __split_next_bits(p, d, n, 16); + else if (p->ilen < 4 && bits > 16 && bits <= 24) + return __split_next_bits(p, d, n, 8); + + if (DIV_ROUND_UP(bits, 8) > p->ilen) + return -EOVERFLOW; + + if (bits <= 8) + *d = *in >> (8 - bits); + else if (bits <= 16) + *d = be16_to_cpu(get_unaligned((__be16 *)in)) >> (16 - bits); + else if (bits <= 32) + *d = be32_to_cpu(get_unaligned((__be32 *)in)) >> (32 - bits); + else + *d = be64_to_cpu(get_unaligned((__be64 *)in)) >> (64 - bits); + + *d &= GENMASK_ULL(n - 1, 0); + + p->bit += n; + + if (p->bit > 7) { + p->in += p->bit / 8; + p->ilen -= p->bit / 8; + p->bit %= 8; + } + + return 0; +} + +static int do_data(struct sw842_param *p, u8 n) +{ + u64 v; + int ret; + + if (n > p->olen) + return -ENOSPC; + + ret = next_bits(p, &v, n * 8); + if (ret) + return ret; + + switch (n) { + case 2: + put_unaligned(cpu_to_be16((u16)v), (__be16 *)p->out); + break; + case 4: + put_unaligned(cpu_to_be32((u32)v), (__be32 *)p->out); + break; + case 8: + put_unaligned(cpu_to_be64((u64)v), (__be64 *)p->out); + break; + default: + return -EINVAL; + } + + p->out += n; + p->olen -= n; + + return 0; +} + +static int __do_index(struct sw842_param *p, u8 size, u8 bits, u64 fsize) +{ + u64 index, offset, total = round_down(p->out - p->ostart, 8); + int ret; + + ret = next_bits(p, &index, bits); + if (ret) + return ret; + + offset = index * size; + + /* a ring buffer of fsize is used; correct the offset */ + if (total > fsize) { + /* this is where the current fifo is */ + u64 section = round_down(total, fsize); + /* the current pos in the fifo */ + u64 pos = total - section; + + /* if the offset is past/at the pos, we need to + * go back to the last fifo section + */ + if (offset >= pos) + section -= fsize; + + offset += section; + } + + if (offset + size > total) { + pr_debug("index%x %lx points past end %lx\n", size, + (unsigned long)offset, (unsigned long)total); + return -EINVAL; + } + + pr_debug("index%x to %lx off %lx adjoff %lx tot %lx data %lx\n", + size, (unsigned long)index, (unsigned long)(index * size), + (unsigned long)offset, (unsigned long)total, + (unsigned long)beN_to_cpu(&p->ostart[offset], size)); + + memcpy(p->out, &p->ostart[offset], size); + p->out += size; + p->olen -= size; + + return 0; +} + +static int do_index(struct sw842_param *p, u8 n) +{ + switch (n) { + case 2: + return __do_index(p, 2, I2_BITS, I2_FIFO_SIZE); + case 4: + return __do_index(p, 4, I4_BITS, I4_FIFO_SIZE); + case 8: + return __do_index(p, 8, I8_BITS, I8_FIFO_SIZE); + default: + return -EINVAL; + } +} + +static int do_op(struct sw842_param *p, u8 o) +{ + int i, ret = 0; + + if (o >= OPS_MAX) + return -EINVAL; + + for (i = 0; i < 4; i++) { + u8 op = decomp_ops[o][i]; + + pr_debug("op is %x\n", op); + + switch (op & OP_ACTION) { + case OP_ACTION_DATA: + ret = do_data(p, op & OP_AMOUNT); + break; + case OP_ACTION_INDEX: + ret = do_index(p, op & OP_AMOUNT); + break; + case OP_ACTION_NOOP: + break; + default: + pr_err("Interal error, invalid op %x\n", op); + return -EINVAL; + } + + if (ret) + return ret; + } + + if (sw842_template_counts) + atomic_inc(&template_count[o]); + + return 0; +} + +/** + * sw842_decompress + * + * Decompress the 842-compressed buffer of length @ilen at @in + * to the output buffer @out, using no more than @olen bytes. + * + * The compressed buffer must be only a single 842-compressed buffer, + * with the standard format described in the comments in 842.h + * Processing will stop when the 842 "END" template is detected, + * not the end of the buffer. + * + * Returns: 0 on success, error on failure. The @olen parameter + * will contain the number of output bytes written on success, or + * 0 on error. + */ +int sw842_decompress(const u8 *in, unsigned int ilen, + u8 *out, unsigned int *olen) +{ + struct sw842_param p; + int ret; + u64 op, rep, tmp, bytes, total; + + p.in = (u8 *)in; + p.bit = 0; + p.ilen = ilen; + p.out = out; + p.ostart = out; + p.olen = *olen; + + total = p.olen; + + *olen = 0; + + do { + ret = next_bits(&p, &op, OP_BITS); + if (ret) + return ret; + + pr_debug("template is %lx\n", (unsigned long)op); + + switch (op) { + case OP_REPEAT: + ret = next_bits(&p, &rep, REPEAT_BITS); + if (ret) + return ret; + + if (p.out == out) /* no previous bytes */ + return -EINVAL; + + /* copy rep + 1 */ + rep++; + + if (rep * 8 > p.olen) + return -ENOSPC; + + while (rep-- > 0) { + memcpy(p.out, p.out - 8, 8); + p.out += 8; + p.olen -= 8; + } + + if (sw842_template_counts) + atomic_inc(&template_repeat_count); + + break; + case OP_ZEROS: + if (8 > p.olen) + return -ENOSPC; + + memset(p.out, 0, 8); + p.out += 8; + p.olen -= 8; + + if (sw842_template_counts) + atomic_inc(&template_zeros_count); + + break; + case OP_SHORT_DATA: + ret = next_bits(&p, &bytes, SHORT_DATA_BITS); + if (ret) + return ret; + + if (!bytes || bytes > SHORT_DATA_BITS_MAX) + return -EINVAL; + + while (bytes-- > 0) { + ret = next_bits(&p, &tmp, 8); + if (ret) + return ret; + *p.out = (u8)tmp; + p.out++; + p.olen--; + } + + if (sw842_template_counts) + atomic_inc(&template_short_data_count); + + break; + case OP_END: + if (sw842_template_counts) + atomic_inc(&template_end_count); + + break; + default: /* use template */ + ret = do_op(&p, op); + if (ret) + return ret; + break; + } + } while (op != OP_END); + + if (unlikely((total - p.olen) > UINT_MAX)) + return -ENOSPC; + + *olen = total - p.olen; + + return 0; +} +EXPORT_SYMBOL_GPL(sw842_decompress); + +static int __init sw842_init(void) +{ + if (sw842_template_counts) + sw842_debugfs_create(); + + return 0; +} +module_init(sw842_init); + +static void __exit sw842_exit(void) +{ + if (sw842_template_counts) + sw842_debugfs_remove(); +} +module_exit(sw842_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software 842 Decompressor"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/lib/842/Makefile b/lib/842/Makefile new file mode 100644 index 000000000000..5d24c0baff2e --- /dev/null +++ b/lib/842/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_842_COMPRESS) += 842_compress.o +obj-$(CONFIG_842_DECOMPRESS) += 842_decompress.o diff --git a/lib/Kconfig b/lib/Kconfig index 54cf309a92a5..34e332b8d326 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -13,8 +13,16 @@ config RAID6_PQ config BITREVERSE tristate +config HAVE_ARCH_BITREVERSE + bool + default n + depends on BITREVERSE + help + This option enables the use of hardware bit-reversal instructions on + architectures which support such operations. + config RATIONAL - boolean + bool config GENERIC_STRNCPY_FROM_USER bool @@ -39,14 +47,14 @@ config GENERIC_IOMAP select GENERIC_PCI_IOMAP config GENERIC_IO - boolean + bool default n config STMP_DEVICE bool config PERCPU_RWSEM - boolean + bool config ARCH_USE_CMPXCHG_LOCKREF bool @@ -204,6 +212,12 @@ config RANDOM32_SELFTEST # # compression support is select'ed if needed # +config 842_COMPRESS + tristate + +config 842_DECOMPRESS + tristate + config ZLIB_INFLATE tristate @@ -257,7 +271,7 @@ config DECOMPRESS_LZ4 # Generic allocator support is selected if needed # config GENERIC_ALLOCATOR - boolean + bool # # reed solomon support is select'ed if needed @@ -266,16 +280,16 @@ config REED_SOLOMON tristate config REED_SOLOMON_ENC8 - boolean + bool config REED_SOLOMON_DEC8 - boolean + bool config REED_SOLOMON_ENC16 - boolean + bool config REED_SOLOMON_DEC16 - boolean + bool # # BCH support is selected if needed @@ -284,7 +298,7 @@ config BCH tristate config BCH_CONST_PARAMS - boolean + bool help Drivers may select this option to force specific constant values for parameters 'm' (Galois field order) and 't' @@ -320,7 +334,7 @@ config BCH_CONST_T # Textsearch support is select'ed if needed # config TEXTSEARCH - boolean + bool config TEXTSEARCH_KMP tristate @@ -332,10 +346,10 @@ config TEXTSEARCH_FSM tristate config BTREE - boolean + bool config INTERVAL_TREE - boolean + bool help Simple, embeddable, interval-tree. Can find the start of an overlapping range in log(n) time and then iterate over all @@ -363,18 +377,18 @@ config ASSOCIATIVE_ARRAY for more information. config HAS_IOMEM - boolean + bool depends on !NO_IOMEM select GENERIC_IO default y config HAS_IOPORT_MAP - boolean + bool depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA - boolean + bool depends on !NO_DMA default y @@ -388,10 +402,6 @@ config CPUMASK_OFFSTACK them on the stack. This is a bit more expensive, but avoids stack overflow. -config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS - bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS - depends on BROKEN - config CPU_RMAP bool depends on SMP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cfe8ca6717c2..b908048f8d6a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -167,6 +167,17 @@ config DEBUG_INFO_DWARF4 But it significantly improves the success of resolving variables in gdb on optimized code. +config GDB_SCRIPTS + bool "Provide GDB scripts for kernel debugging" + depends on DEBUG_INFO + help + This creates the required links to GDB helper scripts in the + build directory. If you load vmlinux into gdb, the helper + scripts will be automatically imported by gdb as well, and + additional functions are available to analyze a Linux kernel + instance. See Documentation/gdb-kernel-debugging.txt for further + details. + config ENABLE_WARN_DEPRECATED bool "Enable __deprecated logic" default y @@ -227,6 +238,22 @@ config UNUSED_SYMBOLS you really need it, and what the merge plan to the mainline kernel for your module is. +config PAGE_OWNER + bool "Track page owner" + depends on DEBUG_KERNEL && STACKTRACE_SUPPORT + select DEBUG_FS + select STACKTRACE + select PAGE_EXTENSION + help + This keeps track of what call chain is the owner of a page, may + help to find bare alloc_page(s) leaks. Even if you include this + feature on your build, it is disabled in default. You should pass + "page_owner=on" to boot parameter in order to enable it. Eats + a fair amount of memory if enabled. See tools/vm/page_owner_sort.c + for user-space helper. + + If unsure, say N. + config DEBUG_FS bool "Debug Filesystem" help @@ -635,6 +662,8 @@ config DEBUG_STACKOVERFLOW source "lib/Kconfig.kmemcheck" +source "lib/Kconfig.kasan" + endmenu # "Memory Debugging" config DEBUG_SHIRQ @@ -836,6 +865,19 @@ config SCHED_STACK_END_CHECK data corruption or a sporadic crash at a later stage once the region is examined. The runtime overhead introduced is minimal. +config DEBUG_TIMEKEEPING + bool "Enable extra timekeeping sanity checking" + help + This option will enable additional timekeeping sanity checks + which may be helpful when diagnosing issues where timekeeping + problems are suspected. + + This may include checks in the timekeeping hotpaths, so this + option may have a (very small) performance impact to some + workloads. + + If unsure, say N. + config TIMER_STATS bool "Collect kernel timers statistics" depends on DEBUG_KERNEL && PROC_FS @@ -1151,16 +1193,7 @@ config DEBUG_CREDENTIALS menu "RCU Debugging" config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. + def_bool PROVE_LOCKING config PROVE_RCU_REPEATEDLY bool "RCU debugging: don't disable PROVE_RCU on first splat" @@ -1199,6 +1232,8 @@ config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL select TORTURE_TEST + select SRCU + select TASKS_RCU default n help This option provides a kernel module that runs torture tests @@ -1227,6 +1262,81 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_TORTURE_TEST_SLOW_PREINIT + bool "Slow down RCU grace-period pre-initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period pre-initialization (the + propagation of CPU-hotplug changes up the rcu_node combining + tree) for a few jiffies between initializing each pair of + consecutive rcu_node structures. This helps to expose races + involving grace-period pre-initialization, in other words, it + makes your kernel less stable. It can also greatly increase + grace-period latency, especially on systems with large numbers + of CPUs. This is useful when torture-testing RCU, but in + almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY + int "How much to slow down RCU grace-period pre-initialization" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_PREINIT + help + This option specifies the number of jiffies to wait between + each rcu_node structure pre-initialization step. + +config RCU_TORTURE_TEST_SLOW_INIT + bool "Slow down RCU grace-period initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period initialization for a few + jiffies between initializing each pair of consecutive + rcu_node structures. This helps to expose races involving + grace-period initialization, in other words, it makes your + kernel less stable. It can also greatly increase grace-period + latency, especially on systems with large numbers of CPUs. + This is useful when torture-testing RCU, but in almost no + other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_INIT_DELAY + int "How much to slow down RCU grace-period initialization" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_INIT + help + This option specifies the number of jiffies to wait between + each rcu_node structure initialization. + +config RCU_TORTURE_TEST_SLOW_CLEANUP + bool "Slow down RCU grace-period cleanup to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period cleanup for a few jiffies + between cleaning up each pair of consecutive rcu_node + structures. This helps to expose races involving grace-period + cleanup, in other words, it makes your kernel less stable. + It can also greatly increase grace-period latency, especially + on systems with large numbers of CPUs. This is useful when + torture-testing RCU, but in almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY + int "How much to slow down RCU grace-period cleanup" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_CLEANUP + help + This option specifies the number of jiffies to wait between + each rcu_node structure cleanup operation. + config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON @@ -1238,22 +1348,10 @@ config RCU_CPU_STALL_TIMEOUT RCU grace period persists, additional CPU stall warnings are printed at more widely spaced intervals. -config RCU_CPU_STALL_VERBOSE - bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" - depends on TREE_PREEMPT_RCU - default y - help - This option causes RCU to printk detailed per-task information - for any tasks that are stalling the current RCU grace period. - - Say N if you are unsure. - - Say Y if you want to enable such checks. - config RCU_CPU_STALL_INFO bool "Print additional diagnostics on RCU CPU stall" - depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL - default n + depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL + default y help For each stalled CPU that is aware of the current RCU grace period, print out additional per-CPU diagnostic information @@ -1275,6 +1373,17 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +config RCU_EQS_DEBUG + bool "Use this when adding any sort of NO_HZ support to your arch" + depends on DEBUG_KERNEL + help + This option provides consistency checks in RCU's handling of + NO_HZ. These checks have proven quite helpful in detecting + bugs in arch-specific NO_HZ code. + + Say N here if you need ultimate kernel/user switch latencies + Say Y if you are unsure + endmenu # "RCU Debugging" config DEBUG_BLOCK_EXT_DEVT @@ -1575,6 +1684,9 @@ config ASYNC_RAID6_TEST If unsure, say N. +config TEST_HEXDUMP + tristate "Test functions located in the hexdump module at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" @@ -1582,7 +1694,7 @@ config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" config TEST_RHASHTABLE - bool "Perform selftest on resizable hash table" + tristate "Perform selftest on resizable hash table" default n help Enable this option to test the rhashtable functions at boot. @@ -1711,6 +1823,18 @@ config TEST_UDELAY If unsure, say N. +config MEMTEST + bool "Memtest" + depends on HAVE_MEMBLOCK + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=17, mean do 17 test patterns. + If you are unsure how to answer this question, answer N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan new file mode 100644 index 000000000000..777eda7d1ab4 --- /dev/null +++ b/lib/Kconfig.kasan @@ -0,0 +1,58 @@ +config HAVE_ARCH_KASAN + bool + +if HAVE_ARCH_KASAN + +config KASAN + bool "KASan: runtime memory debugger" + depends on SLUB_DEBUG + select CONSTRUCTORS + help + Enables kernel address sanitizer - runtime memory debugger, + designed to find out-of-bounds accesses and use-after-free bugs. + This is strictly a debugging feature and it requires a gcc version + of 4.9.2 or later. Detection of out of bounds accesses to stack or + global variables requires gcc 5.0 or later. + This feature consumes about 1/8 of available memory and brings about + ~x3 performance slowdown. + For better error detection enable CONFIG_STACKTRACE, + and add slub_debug=U to boot cmdline. + +config KASAN_SHADOW_OFFSET + hex + default 0xdffffc0000000000 if X86_64 + +choice + prompt "Instrumentation type" + depends on KASAN + default KASAN_OUTLINE + +config KASAN_OUTLINE + bool "Outline instrumentation" + help + Before every memory access compiler insert function call + __asan_load*/__asan_store*. These functions performs check + of shadow memory. This is slower than inline instrumentation, + however it doesn't bloat size of kernel's .text section so + much as inline does. + +config KASAN_INLINE + bool "Inline instrumentation" + help + Compiler directly inserts code checking shadow memory before + memory accesses. This is faster than outline (in some workloads + it gives about x2 boost over outline instrumentation), but + make kernel's .text size much bigger. + This requires a gcc version of 5.0 or later. + +endchoice + +config TEST_KASAN + tristate "Module for testing kasan for bug detection" + depends on m && KASAN + help + This is a test module doing various nasty things like + out of bounds accesses, use after free. It is useful for testing + kernel debugging features like kernel address sanitizer. + +endif diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 358eb81fa28d..c635a107a7de 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -73,6 +73,31 @@ config KGDB_KDB help KDB frontend for kernel +config KDB_DEFAULT_ENABLE + hex "KDB: Select kdb command functions to be enabled by default" + depends on KGDB_KDB + default 0x1 + help + Specifiers which kdb commands are enabled by default. This may + be set to 1 or 0 to enable all commands or disable almost all + commands. + + Alternatively the following bitmask applies: + + 0x0002 - allow arbitrary reads from memory and symbol lookup + 0x0004 - allow arbitrary writes to memory + 0x0008 - allow current register state to be inspected + 0x0010 - allow current register state to be modified + 0x0020 - allow passive inspection (backtrace, process list, lsmod) + 0x0040 - allow flow control management (breakpoint, single step) + 0x0080 - enable signalling of processes + 0x0100 - allow machine to be rebooted + + The config option merely sets the default at boot time. Both + issuing 'echo X > /sys/module/kdb/parameters/cmd_enable' or + setting with kdb.cmd_enable=X kernel command line option will + override the default settings. + config KDB_KEYBOARD bool "KGDB_KDB: keyboard as input device" depends on VT && KGDB_KDB diff --git a/lib/Makefile b/lib/Makefile index 7512dc978f18..ff37c8c2f7b2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -4,16 +4,16 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ + sha1.o md5.o irq_regs.o argv_split.o \ proportions.o flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o + earlycpio.o seq_buf.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -23,18 +23,22 @@ lib-y += kobject.o klist.o obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ - bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o rhashtable.o + bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ + bsearch.o find_bit.o llist.o memweight.o kfifo.o \ + percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o +obj-y += hexdump.o +obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o obj-y += kstrtox.o +obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o +obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o -obj-$(CONFIG_TEST_BPF) += test_bpf.o -obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -74,6 +78,8 @@ obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o +obj-$(CONFIG_842_COMPRESS) += 842/ +obj-$(CONFIG_842_DECOMPRESS) += 842/ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ @@ -102,7 +108,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 2404d03e251a..03dd576e6773 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -11,6 +11,7 @@ * 2 of the Licence, or (at your option) any later version. */ //#define DEBUG +#include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/assoc_array_priv.h> diff --git a/lib/audit.c b/lib/audit.c index 1d726a22565b..b8fb5ee81e26 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -54,6 +54,9 @@ int audit_classify_syscall(int abi, unsigned syscall) case __NR_socketcall: return 4; #endif +#ifdef __NR_execveat + case __NR_execveat: +#endif case __NR_execve: return 5; default: diff --git a/lib/bitmap.c b/lib/bitmap.c index b499ab6ada29..64c0926f5dd8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -12,6 +12,8 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/bug.h> + +#include <asm/page.h> #include <asm/uaccess.h> /* @@ -40,36 +42,6 @@ * for the best explanations of this ordering. */ -int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) -{ - unsigned int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} -EXPORT_SYMBOL(__bitmap_empty); - -int __bitmap_full(const unsigned long *bitmap, unsigned int bits) -{ - unsigned int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (~bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} -EXPORT_SYMBOL(__bitmap_full); - int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { @@ -102,18 +74,18 @@ EXPORT_SYMBOL(__bitmap_complement); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting right (dividing) means moving bits in the MS -> LS bit * direction. Zeros are fed into the vacated MS positions and the * LS bits shifted off the bottom are lost. */ -void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned shift, unsigned nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; - unsigned long mask = (1UL << left) - 1; + unsigned k, lim = BITS_TO_LONGS(nbits); + unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); for (k = 0; off + k < lim; ++k) { unsigned long upper, lower; @@ -125,17 +97,15 @@ void __bitmap_shift_right(unsigned long *dst, upper = 0; else { upper = src[off + k + 1]; - if (off + k + 1 == lim - 1 && left) + if (off + k + 1 == lim - 1) upper &= mask; + upper <<= (BITS_PER_LONG - rem); } lower = src[off + k]; - if (left && off + k == lim - 1) + if (off + k == lim - 1) lower &= mask; - dst[k] = lower >> rem; - if (rem) - dst[k] |= upper << (BITS_PER_LONG - rem); - if (left && k == lim - 1) - dst[k] &= mask; + lower >>= rem; + dst[k] = lower | upper; } if (off) memset(&dst[lim - off], 0, off*sizeof(unsigned long)); @@ -148,18 +118,19 @@ EXPORT_SYMBOL(__bitmap_shift_right); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting left (multiplying) means moving bits in the LS -> MS * direction. Zeros are fed into the vacated LS bit positions * and those MS bits shifted off the top are lost. */ -void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + int k; + unsigned int lim = BITS_TO_LONGS(nbits); + unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; for (k = lim - off - 1; k >= 0; --k) { unsigned long upper, lower; @@ -168,17 +139,11 @@ void __bitmap_shift_left(unsigned long *dst, * word below and make them the bottom rem bits of result. */ if (rem && k > 0) - lower = src[k - 1]; + lower = src[k - 1] >> (BITS_PER_LONG - rem); else lower = 0; - upper = src[k]; - if (left && k == lim - 1) - upper &= (1UL << left) - 1; - dst[k + off] = upper << rem; - if (rem) - dst[k + off] |= lower >> (BITS_PER_LONG - rem); - if (left && k + off == lim - 1) - dst[k + off] &= (1UL << left) - 1; + upper = src[k] << rem; + dst[k + off] = lower | upper; } if (off) memset(dst, 0, off*sizeof(unsigned long)); @@ -326,30 +291,32 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len) } EXPORT_SYMBOL(bitmap_clear); -/* - * bitmap_find_next_zero_area - find a contiguous aligned zero area +/** + * bitmap_find_next_zero_area_off - find a contiguous aligned zero area * @map: The address to base the search on * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for * @align_mask: Alignment mask for zero area + * @align_offset: Alignment offset for zero area. * * The @align_mask should be one less than a power of 2; the effect is that - * the bit offset of all zero areas this function finds is multiples of that - * power of 2. A @align_mask of 0 means no alignment is required. + * the bit offset of all zero areas this function finds plus @align_offset + * is multiple of that power of 2. */ -unsigned long bitmap_find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) +unsigned long bitmap_find_next_zero_area_off(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask, + unsigned long align_offset) { unsigned long index, end, i; again: index = find_next_zero_bit(map, size, start); /* Align allocation */ - index = __ALIGN_MASK(index, align_mask); + index = __ALIGN_MASK(index + align_offset, align_mask) - align_offset; end = index + nr; if (end > size) @@ -361,7 +328,7 @@ again: } return index; } -EXPORT_SYMBOL(bitmap_find_next_zero_area); +EXPORT_SYMBOL(bitmap_find_next_zero_area_off); /* * Bitmap printing & parsing functions: first version by Nadia Yvette Chambers, @@ -373,45 +340,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area); #define BASEDEC 10 /* fancier cpuset lists input in decimal */ /** - * bitmap_scnprintf - convert bitmap to an ASCII hex string. - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. Returns the number of - * characters which were written to *buf, excluding the trailing \0. - */ -int bitmap_scnprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - int i, word, bit, len = 0; - unsigned long val; - const char *sep = ""; - int chunksz; - u32 chunkmask; - - chunksz = nmaskbits & (CHUNKSZ - 1); - if (chunksz == 0) - chunksz = CHUNKSZ; - - i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; - for (; i >= 0; i -= CHUNKSZ) { - chunkmask = ((1ULL << chunksz) - 1); - word = i / BITS_PER_LONG; - bit = i % BITS_PER_LONG; - val = (maskp[word] >> bit) & chunkmask; - len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep, - (chunksz+3)/4, val); - chunksz = CHUNKSZ; - sep = ","; - } - return len; -} -EXPORT_SYMBOL(bitmap_scnprintf); - -/** * __bitmap_parse - convert an ASCII hex string into a bitmap. * @buf: pointer to buffer containing string. * @buflen: buffer size in bytes. If string is smaller than this @@ -524,64 +452,32 @@ int bitmap_parse_user(const char __user *ubuf, } EXPORT_SYMBOL(bitmap_parse_user); -/* - * bscnl_emit(buf, buflen, rbot, rtop, bp) - * - * Helper routine for bitmap_scnlistprintf(). Write decimal number - * or range to buf, suppressing output past buf+buflen, with optional - * comma-prefix. Return len of what was written to *buf, excluding the - * trailing \0. - */ -static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) -{ - if (len > 0) - len += scnprintf(buf + len, buflen - len, ","); - if (rbot == rtop) - len += scnprintf(buf + len, buflen - len, "%d", rbot); - else - len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop); - return len; -} - /** - * bitmap_scnlistprintf - convert bitmap to list format ASCII string - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes + * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string + * @list: indicates whether the bitmap must be list + * @buf: page aligned buffer into which string is placed * @maskp: pointer to bitmap to convert * @nmaskbits: size of bitmap, in bits * * Output format is a comma-separated list of decimal numbers and - * ranges. Consecutively set bits are shown as two hyphen-separated - * decimal numbers, the smallest and largest bit numbers set in - * the range. Output format is compatible with the format - * accepted as input by bitmap_parselist(). - * - * The return value is the number of characters which were written to *buf - * excluding the trailing '\0', as per ISO C99's scnprintf. + * ranges if list is specified or hex digits grouped into comma-separated + * sets of 8 digits/set. Returns the number of characters written to buf. */ -int bitmap_scnlistprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, + int nmaskbits) { - int len = 0; - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; - - if (buflen == 0) - return 0; - buf[0] = 0; - - rbot = cur = find_first_bit(maskp, nmaskbits); - while (cur < nmaskbits) { - rtop = cur; - cur = find_next_bit(maskp, nmaskbits, cur+1); - if (cur >= nmaskbits || cur > rtop + 1) { - len = bscnl_emit(buf, buflen, rbot, rtop, len); - rbot = cur; - } + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf - 2; + int n = 0; + + if (len > 1) { + n = list ? scnprintf(buf, len, "%*pbl", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb", nmaskbits, maskp); + buf[n++] = '\n'; + buf[n] = '\0'; } - return len; + return n; } -EXPORT_SYMBOL(bitmap_scnlistprintf); +EXPORT_SYMBOL(bitmap_print_to_pagebuf); /** * __bitmap_parselist - convert list format ASCII string to bitmap @@ -713,10 +609,10 @@ EXPORT_SYMBOL(bitmap_parselist_user); /** * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap - * @pos: a bit position in @buf (0 <= @pos < @bits) - * @bits: number of valid bit positions in @buf + * @pos: a bit position in @buf (0 <= @pos < @nbits) + * @nbits: number of valid bit positions in @buf * - * Map the bit at position @pos in @buf (of length @bits) to the + * Map the bit at position @pos in @buf (of length @nbits) to the * ordinal of which set bit it is. If it is not set or if @pos * is not a valid bit position, map to -1. * @@ -728,56 +624,40 @@ EXPORT_SYMBOL(bitmap_parselist_user); * * The bit positions 0 through @bits are valid positions in @buf. */ -static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) +static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigned int nbits) { - int i, ord; - - if (pos < 0 || pos >= bits || !test_bit(pos, buf)) + if (pos >= nbits || !test_bit(pos, buf)) return -1; - i = find_first_bit(buf, bits); - ord = 0; - while (i < pos) { - i = find_next_bit(buf, bits, i + 1); - ord++; - } - BUG_ON(i != pos); - - return ord; + return __bitmap_weight(buf, pos); } /** * bitmap_ord_to_pos - find position of n-th set bit in bitmap * @buf: pointer to bitmap * @ord: ordinal bit position (n-th set bit, n >= 0) - * @bits: number of valid bit positions in @buf + * @nbits: number of valid bit positions in @buf * * Map the ordinal offset of bit @ord in @buf to its position in @buf. - * Value of @ord should be in range 0 <= @ord < weight(buf), else - * results are undefined. + * Value of @ord should be in range 0 <= @ord < weight(buf). If @ord + * >= weight(buf), returns @nbits. * * If for example, just bits 4 through 7 are set in @buf, then @ord * values 0 through 3 will get mapped to 4 through 7, respectively, - * and all other @ord values return undefined values. When @ord value 3 + * and all other @ord values returns @nbits. When @ord value 3 * gets mapped to (returns) @pos value 7 in this example, that means * that the 3rd set bit (starting with 0th) is at position 7 in @buf. * - * The bit positions 0 through @bits are valid positions in @buf. + * The bit positions 0 through @nbits-1 are valid positions in @buf. */ -int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +unsigned int bitmap_ord_to_pos(const unsigned long *buf, unsigned int ord, unsigned int nbits) { - int pos = 0; + unsigned int pos; - if (ord >= 0 && ord < bits) { - int i; - - for (i = find_first_bit(buf, bits); - i < bits && ord > 0; - i = find_next_bit(buf, bits, i + 1)) - ord--; - if (i < bits && ord == 0) - pos = i; - } + for (pos = find_first_bit(buf, nbits); + pos < nbits && ord; + pos = find_next_bit(buf, nbits, pos + 1)) + ord--; return pos; } @@ -788,7 +668,7 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) * @src: subset to be remapped * @old: defines domain of map * @new: defines range of map - * @bits: number of bits in each of these bitmaps + * @nbits: number of bits in each of these bitmaps * * Let @old and @new define a mapping of bit positions, such that * whatever position is held by the n-th set bit in @old is mapped @@ -816,22 +696,22 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) */ void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, - int bits) + unsigned int nbits) { - int oldbit, w; + unsigned int oldbit, w; if (dst == src) /* following doesn't handle inplace remaps */ return; - bitmap_zero(dst, bits); + bitmap_zero(dst, nbits); - w = bitmap_weight(new, bits); - for_each_set_bit(oldbit, src, bits) { - int n = bitmap_pos_to_ord(old, oldbit, bits); + w = bitmap_weight(new, nbits); + for_each_set_bit(oldbit, src, nbits) { + int n = bitmap_pos_to_ord(old, oldbit, nbits); if (n < 0 || w == 0) set_bit(oldbit, dst); /* identity map */ else - set_bit(bitmap_ord_to_pos(new, n % w, bits), dst); + set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst); } } EXPORT_SYMBOL(bitmap_remap); @@ -975,9 +855,9 @@ EXPORT_SYMBOL(bitmap_bitremap); * All bits in @dst not set by the above rule are cleared. */ void bitmap_onto(unsigned long *dst, const unsigned long *orig, - const unsigned long *relmap, int bits) + const unsigned long *relmap, unsigned int bits) { - int n, m; /* same meaning as in above comment */ + unsigned int n, m; /* same meaning as in above comment */ if (dst == orig) /* following doesn't handle inplace mappings */ return; @@ -1008,22 +888,22 @@ EXPORT_SYMBOL(bitmap_onto); * @dst: resulting smaller bitmap * @orig: original larger bitmap * @sz: specified size - * @bits: number of bits in each of these bitmaps + * @nbits: number of bits in each of these bitmaps * * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst. * Clear all other bits in @dst. See further the comment and * Example [2] for bitmap_onto() for why and how to use this. */ void bitmap_fold(unsigned long *dst, const unsigned long *orig, - int sz, int bits) + unsigned int sz, unsigned int nbits) { - int oldbit; + unsigned int oldbit; if (dst == orig) /* following doesn't handle inplace mappings */ return; - bitmap_zero(dst, bits); + bitmap_zero(dst, nbits); - for_each_set_bit(oldbit, orig, bits) + for_each_set_bit(oldbit, orig, nbits) set_bit(oldbit % sz, dst); } EXPORT_SYMBOL(bitmap_fold); @@ -1176,16 +1056,17 @@ EXPORT_SYMBOL(bitmap_allocate_region); * * Require nbits % BITS_PER_LONG == 0. */ -void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +#ifdef __BIG_ENDIAN +void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) { - unsigned long *d = dst; - int i; + unsigned int i; for (i = 0; i < nbits/BITS_PER_LONG; i++) { if (BITS_PER_LONG == 64) - d[i] = cpu_to_le64(src[i]); + dst[i] = cpu_to_le64(src[i]); else - d[i] = cpu_to_le32(src[i]); + dst[i] = cpu_to_le32(src[i]); } } EXPORT_SYMBOL(bitmap_copy_le); +#endif diff --git a/lib/bitrev.c b/lib/bitrev.c index 3956203456d4..40ffda94cc5d 100644 --- a/lib/bitrev.c +++ b/lib/bitrev.c @@ -1,3 +1,4 @@ +#ifndef CONFIG_HAVE_ARCH_BITREVERSE #include <linux/types.h> #include <linux/module.h> #include <linux/bitrev.h> @@ -42,18 +43,4 @@ const u8 byte_rev_table[256] = { }; EXPORT_SYMBOL_GPL(byte_rev_table); -u16 bitrev16(u16 x) -{ - return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); -} -EXPORT_SYMBOL(bitrev16); - -/** - * bitrev32 - reverse the order of bits in a u32 value - * @x: value to be bit-reversed - */ -u32 bitrev32(u32 x) -{ - return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16); -} -EXPORT_SYMBOL(bitrev32); +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ diff --git a/lib/bug.c b/lib/bug.c index d1d7c7878900..0c3bd9552b6f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list); static const struct bug_entry *module_find_bug(unsigned long bugaddr) { struct module *mod; + const struct bug_entry *bug = NULL; - list_for_each_entry(mod, &module_bug_list, bug_list) { - const struct bug_entry *bug = mod->bug_table; + rcu_read_lock(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; + bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - return bug; + goto out; } - return NULL; + bug = NULL; +out: + rcu_read_unlock(); + + return bug; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * Strictly speaking this should have a spinlock to protect against * traversals, but since we only traverse on BUG()s, a spinlock * could potentially lead to deadlock and thus be counter-productive. + * Thus, this uses RCU to safely manipulate the bug list, since BUG + * must run in non-interruptive state. */ - list_add(&mod->bug_list, &module_bug_list); + list_add_rcu(&mod->bug_list, &module_bug_list); } void module_bug_cleanup(struct module *mod) { - list_del(&mod->bug_list); + list_del_rcu(&mod->bug_list); } #else diff --git a/lib/checksum.c b/lib/checksum.c index 129775eb6de6..8b39e86dbab5 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -181,6 +181,15 @@ csum_partial_copy(const void *src, void *dst, int len, __wsum sum) EXPORT_SYMBOL(csum_partial_copy); #ifndef csum_tcpudp_nofold +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, @@ -195,8 +204,7 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, #else s += (proto + len) << 8; #endif - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); } EXPORT_SYMBOL(csum_tcpudp_nofold); #endif diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 4f134d8907a7..f610b2a10b3e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, /* Update distances based on topology */ for_each_cpu(cpu, update_mask) { if (cpu_rmap_copy_neigh(rmap, cpu, - topology_thread_cpumask(cpu), 1)) + topology_sibling_cpumask(cpu), 1)) continue; if (cpu_rmap_copy_neigh(rmap, cpu, topology_core_cpumask(cpu), 2)) diff --git a/lib/cpumask.c b/lib/cpumask.c index b6513a9f2892..5a70f6196f57 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,27 +5,6 @@ #include <linux/export.h> #include <linux/bootmem.h> -int __first_cpu(const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS)); -} -EXPORT_SYMBOL(__first_cpu); - -int __next_cpu(int n, const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1)); -} -EXPORT_SYMBOL(__next_cpu); - -#if NR_CPUS > 64 -int __next_cpu_nr(int n, const cpumask_t *srcp) -{ - return min_t(int, nr_cpu_ids, - find_next_bit(srcp->bits, nr_cpu_ids, n+1)); -} -EXPORT_SYMBOL(__next_cpu_nr); -#endif - /** * cpumask_next_and - get the next cpu in *src1p & *src2p * @n: the cpu prior to the place to search (ie. return will be > @n) @@ -89,13 +68,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) dump_stack(); } #endif - /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ - if (*mask) { - unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); - unsigned int tail; - tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); - memset(ptr + cpumask_size() - tail, 0, tail); - } return *mask != NULL; } @@ -166,64 +138,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; + + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; + + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/decompress.c b/lib/decompress.c index 37f3c786348f..528ff932d8e4 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -44,8 +44,8 @@ struct compress_format { }; static const struct compress_format compressed_formats[] __initconst = { - { {037, 0213}, "gzip", gunzip }, - { {037, 0236}, "gzip", gunzip }, + { {0x1f, 0x8b}, "gzip", gunzip }, + { {0x1f, 0x9e}, "gzip", gunzip }, { {0x42, 0x5a}, "bzip2", bunzip2 }, { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 8290e0bef7ea..6dd0335ea61b 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -184,7 +184,7 @@ static int INIT get_next_block(struct bunzip_data *bd) if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; origPtr = get_bits(bd, 24); - if (origPtr > dbufSize) + if (origPtr >= dbufSize) return RETVAL_DATA_ERROR; /* mapping table: if some byte values are never used (encoding things like ascii text), the compression code removes the gaps to have fewer diff --git a/lib/devres.c b/lib/devres.c index f4a195a6efe4..fbe2aac522e6 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -23,7 +23,7 @@ static int devm_ioremap_match(struct device *dev, void *res, void *match_data) * Managed ioremap(). Map is automatically unmapped on driver detach. */ void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, - unsigned long size) + resource_size_t size) { void __iomem **ptr, *addr; @@ -52,7 +52,7 @@ EXPORT_SYMBOL(devm_ioremap); * detach. */ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, - unsigned long size) + resource_size_t size) { void __iomem **ptr, *addr; @@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, EXPORT_SYMBOL(devm_ioremap_nocache); /** + * devm_ioremap_wc - Managed ioremap_wc() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap_wc(). Map is automatically unmapped on driver detach. + */ +void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, + resource_size_t size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_wc(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_wc); + +/** * devm_iounmap - Managed iounmap() * @dev: Generic device to unmap for * @addr: Address to unmap diff --git a/lib/dma-debug.c b/lib/dma-debug.c index add80cc02dbe..ae4b65e17e64 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -102,6 +102,14 @@ static DEFINE_SPINLOCK(free_entries_lock); /* Global disable flag - will be set in case of an error */ static u32 global_disable __read_mostly; +/* Early initialization disable flag, set at the end of dma_debug_init */ +static bool dma_debug_initialized __read_mostly; + +static inline bool dma_debug_disabled(void) +{ + return global_disable || !dma_debug_initialized; +} + /* Global error count */ static u32 error_count; @@ -353,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, unsigned int range = 0; while (range <= max_range) { - entry = __hash_bucket_find(*bucket, &index, containing_match); + entry = __hash_bucket_find(*bucket, ref, containing_match); if (entry) return entry; @@ -945,7 +953,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti struct dma_debug_entry *uninitialized_var(entry); int count; - if (global_disable) + if (dma_debug_disabled()) return 0; switch (action) { @@ -973,7 +981,7 @@ void dma_debug_add_bus(struct bus_type *bus) { struct notifier_block *nb; - if (global_disable) + if (dma_debug_disabled()) return; nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); @@ -994,6 +1002,9 @@ void dma_debug_init(u32 num_entries) { int i; + /* Do not use dma_debug_initialized here, since we really want to be + * called to set dma_debug_initialized + */ if (global_disable) return; @@ -1021,6 +1032,8 @@ void dma_debug_init(u32 num_entries) nr_total_entries = num_free_entries; + dma_debug_initialized = true; + pr_info("DMA-API: debugging enabled by kernel config\n"); } @@ -1243,7 +1256,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, { struct dma_debug_entry *entry; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; if (dma_mapping_error(dev, dma_addr)) @@ -1283,7 +1296,7 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) struct hash_bucket *bucket; unsigned long flags; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; ref.dev = dev; @@ -1325,7 +1338,7 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, .direction = direction, }; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; if (map_single) @@ -1342,7 +1355,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, struct scatterlist *s; int i; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; for_each_sg(sg, s, mapped_ents, i) { @@ -1395,7 +1408,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; int mapped_ents = 0, i; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; for_each_sg(sglist, s, nelems, i) { @@ -1427,7 +1440,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, { struct dma_debug_entry *entry; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; if (unlikely(virt == NULL)) @@ -1462,7 +1475,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, .direction = DMA_BIDIRECTIONAL, }; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; check_unmap(&ref); @@ -1474,7 +1487,7 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, { struct dma_debug_entry ref; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; ref.type = dma_debug_single; @@ -1494,7 +1507,7 @@ void debug_dma_sync_single_for_device(struct device *dev, { struct dma_debug_entry ref; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; ref.type = dma_debug_single; @@ -1515,7 +1528,7 @@ void debug_dma_sync_single_range_for_cpu(struct device *dev, { struct dma_debug_entry ref; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; ref.type = dma_debug_single; @@ -1536,7 +1549,7 @@ void debug_dma_sync_single_range_for_device(struct device *dev, { struct dma_debug_entry ref; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; ref.type = dma_debug_single; @@ -1556,7 +1569,7 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct scatterlist *s; int mapped_ents = 0, i; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; for_each_sg(sg, s, nelems, i) { @@ -1589,7 +1602,7 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct scatterlist *s; int mapped_ents = 0, i; - if (unlikely(global_disable)) + if (unlikely(dma_debug_disabled())) return; for_each_sg(sg, s, nelems, i) { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 96bc6a4b61d1..d8f3d3150603 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -576,7 +576,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor, } else { char buf[PREFIX_SIZE]; - dev_printk_emit(7, dev, "%s%s %s: %pV", + dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV", dynamic_emit_prefix(descriptor, buf), dev_driver_string(dev), dev_name(dev), &vaf); @@ -605,7 +605,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, if (dev && dev->dev.parent) { char buf[PREFIX_SIZE]; - dev_printk_emit(7, dev->dev.parent, + dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent, "%s%s %s %s%s: %pV", dynamic_emit_prefix(descriptor, buf), dev_driver_string(dev->dev.parent), diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index 0777c5a45fa0..f346715e2255 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -3,12 +3,12 @@ * * Copyright (c) 2011, Tom Herbert <therbert@google.com> */ -#include <linux/module.h> #include <linux/types.h> -#include <linux/ctype.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/dynamic_queue_limits.h> +#include <linux/compiler.h> +#include <linux/export.h> #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index d7d501ea856d..f1cdeb024d17 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -40,10 +40,16 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->verbose > 0) - printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n"); - if (attr->verbose > 1) - dump_stack(); + if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { + printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" + "name %pd, interval %lu, probability %lu, " + "space %d, times %d\n", attr->dname, + attr->probability, attr->interval, + atomic_read(&attr->space), + atomic_read(&attr->times)); + if (attr->verbose > 1) + dump_stack(); + } } #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) @@ -202,6 +208,12 @@ struct dentry *fault_create_debugfs_attr(const char *name, goto fail; if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose)) goto fail; + if (!debugfs_create_u32("verbose_ratelimit_interval_ms", mode, dir, + &attr->ratelimit_state.interval)) + goto fail; + if (!debugfs_create_u32("verbose_ratelimit_burst", mode, dir, + &attr->ratelimit_state.burst)) + goto fail; if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter)) goto fail; @@ -222,6 +234,7 @@ struct dentry *fault_create_debugfs_attr(const char *name, #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + attr->dname = dget(dir); return dir; fail: debugfs_remove_recursive(dir); diff --git a/lib/find_bit.c b/lib/find_bit.c new file mode 100644 index 000000000000..18072ea9c20e --- /dev/null +++ b/lib/find_bit.c @@ -0,0 +1,193 @@ +/* bit search implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * Copyright (C) 2008 IBM Corporation + * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease + * size and improve performance, 2015. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/bitmap.h> +#include <linux/export.h> +#include <linux/kernel.h> + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) + +/* + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the "invert" argument, which + * is XORed with each fetched word before searching it for one bits. + */ +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= BITMAP_FIRST_WORD_MASK(start); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(tmp), nbits); +} +#endif + +#ifndef find_next_bit +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_bit); +#endif + +#ifndef find_next_zero_bit +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, ~0UL); +} +EXPORT_SYMBOL(find_next_zero_bit); +#endif + +#ifndef find_first_bit +/* + * Find the first set bit in a memory region. + */ +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); + } + + return size; +} +EXPORT_SYMBOL(find_first_bit); +#endif + +#ifndef find_first_zero_bit +/* + * Find the first cleared bit in a memory region. + */ +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ~0UL) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); + } + + return size; +} +EXPORT_SYMBOL(find_first_zero_bit); +#endif + +#ifndef find_last_bit +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (size) { + unsigned long val = BITMAP_LAST_WORD_MASK(size); + unsigned long idx = (size-1) / BITS_PER_LONG; + + do { + val &= addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); + + val = ~0ul; + } while (idx--); + } + return size; +} +EXPORT_SYMBOL(find_last_bit); +#endif + +#ifdef __BIG_ENDIAN + +/* include/linux/byteorder does not support "unsigned long" type */ +static inline unsigned long ext2_swab(const unsigned long y) +{ +#if BITS_PER_LONG == 64 + return (unsigned long) __swab64((u64) y); +#elif BITS_PER_LONG == 32 + return (unsigned long) __swab32((u32) y); +#else +#error BITS_PER_LONG not defined +#endif +} + +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) +static unsigned long _find_next_bit_le(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(ext2_swab(tmp)), nbits); +} +#endif + +#ifndef find_next_zero_bit_le +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, ~0UL); +} +EXPORT_SYMBOL(find_next_zero_bit_le); +#endif + +#ifndef find_next_bit_le +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_bit_le); +#endif + +#endif /* __BIG_ENDIAN */ diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c deleted file mode 100644 index 91ca09fbf6f9..000000000000 --- a/lib/find_last_bit.c +++ /dev/null @@ -1,49 +0,0 @@ -/* find_last_bit.c: fallback find next bit implementation - * - * Copyright (C) 2008 IBM Corporation - * Written by Rusty Russell <rusty@rustcorp.com.au> - * (Inspired by David Howell's find_next_bit implementation) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/bitops.h> -#include <linux/export.h> -#include <asm/types.h> -#include <asm/byteorder.h> - -#ifndef find_last_bit - -unsigned long find_last_bit(const unsigned long *addr, unsigned long size) -{ - unsigned long words; - unsigned long tmp; - - /* Start at final word. */ - words = size / BITS_PER_LONG; - - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG - - (size & (BITS_PER_LONG-1))))); - if (tmp) - goto found; - } - - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } - } - - /* Not found */ - return size; -} -EXPORT_SYMBOL(find_last_bit); - -#endif diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c deleted file mode 100644 index 0cbfc0b4398f..000000000000 --- a/lib/find_next_bit.c +++ /dev/null @@ -1,285 +0,0 @@ -/* find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/bitops.h> -#include <linux/export.h> -#include <asm/types.h> -#include <asm/byteorder.h> - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -#ifndef find_next_bit -/* - * Find the next set bit in a memory region. - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(find_next_bit); -#endif - -#ifndef find_next_zero_bit -/* - * This implementation of find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h. - */ -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (BITS_PER_LONG - offset); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); -} -EXPORT_SYMBOL(find_next_zero_bit); -#endif - -#ifndef find_first_bit -/* - * Find the first set bit in a memory region. - */ -unsigned long find_first_bit(const unsigned long *addr, unsigned long size) -{ - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(find_first_bit); -#endif - -#ifndef find_first_zero_bit -/* - * Find the first cleared bit in a memory region. - */ -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) | (~0UL << size); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found: - return result + ffz(tmp); -} -EXPORT_SYMBOL(find_first_zero_bit); -#endif - -#ifdef __BIG_ENDIAN - -/* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swabp(const unsigned long * x) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64p((u64 *) x); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32p((u32 *) x); -#else -#error BITS_PER_LONG not defined -#endif -} - -/* include/linux/byteorder doesn't support "unsigned long" type */ -static inline unsigned long ext2_swab(const unsigned long y) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64((u64) y); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32((u32) y); -#else -#error BITS_PER_LONG not defined -#endif -} - -#ifndef find_next_zero_bit_le -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp |= (~0UL >> (BITS_PER_LONG - offset)); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - if (~(tmp = *(p++))) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. Skip ffz */ -found_middle: - return result + ffz(tmp); - -found_middle_swap: - return result + ffz(ext2_swab(tmp)); -} -EXPORT_SYMBOL(find_next_zero_bit_le); -#endif - -#ifndef find_next_bit_le -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - tmp = *(p++); - if (tmp) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); - -found_middle_swap: - return result + __ffs(ext2_swab(tmp)); -} -EXPORT_SYMBOL(find_next_bit_le); -#endif - -#endif /* __BIG_ENDIAN */ diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 71fcfcd96410..d83a372fa76f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -109,7 +109,7 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32table_le, LE_TABLE_ROWS, @@ -119,7 +119,7 @@ int main(int argc, char** argv) if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_be[%d][%d] = {", BE_TABLE_ROWS, BE_TABLE_SIZE); output_table(crc32table_be, LE_TABLE_ROWS, @@ -128,7 +128,7 @@ int main(int argc, char** argv) } if (CRC_LE_BITS > 1) { crc32cinit_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32ctable_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32ctable_le, LE_TABLE_ROWS, diff --git a/lib/genalloc.c b/lib/genalloc.c index cce4dd68c40d..d214866eeea2 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -34,7 +34,6 @@ #include <linux/rculist.h> #include <linux/interrupt.h> #include <linux/genalloc.h> -#include <linux/of_address.h> #include <linux/of_device.h> static inline size_t chunk_size(const struct gen_pool_chunk *chunk) @@ -415,7 +414,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, size_t size) { bool found = false; - unsigned long end = start + size; + unsigned long end = start + size - 1; struct gen_pool_chunk *chunk; rcu_read_lock(); @@ -587,6 +586,8 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, struct gen_pool **ptr, *pool; ptr = devres_alloc(devm_gen_pool_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; pool = gen_pool_create(min_alloc_order, nid); if (pool) { @@ -598,6 +599,7 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, return pool; } +EXPORT_SYMBOL(devm_gen_pool_create); /** * dev_get_gen_pool - Obtain the gen_pool (if any) for a device diff --git a/lib/halfmd4.c b/lib/halfmd4.c index 66d0ee8b7776..a8fe6274a13c 100644 --- a/lib/halfmd4.c +++ b/lib/halfmd4.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/export.h> #include <linux/cryptohash.h> diff --git a/lib/hash.c b/lib/hash.c deleted file mode 100644 index fea973f4bd57..000000000000 --- a/lib/hash.c +++ /dev/null @@ -1,39 +0,0 @@ -/* General purpose hashing library - * - * That's a start of a kernel hashing library, which can be extended - * with further algorithms in future. arch_fast_hash{2,}() will - * eventually resolve to an architecture optimized implementation. - * - * Copyright 2013 Francesco Fusco <ffusco@redhat.com> - * Copyright 2013 Daniel Borkmann <dborkman@redhat.com> - * Copyright 2013 Thomas Graf <tgraf@redhat.com> - * Licensed under the GNU General Public License, version 2.0 (GPLv2) - */ - -#include <linux/jhash.h> -#include <linux/hash.h> -#include <linux/cache.h> - -static struct fast_hash_ops arch_hash_ops __read_mostly = { - .hash = jhash, - .hash2 = jhash2, -}; - -u32 arch_fast_hash(const void *data, u32 len, u32 seed) -{ - return arch_hash_ops.hash(data, len, seed); -} -EXPORT_SYMBOL_GPL(arch_fast_hash); - -u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) -{ - return arch_hash_ops.hash2(data, len, seed); -} -EXPORT_SYMBOL_GPL(arch_fast_hash2); - -static int __init hashlib_init(void) -{ - setup_arch_fast_hash(&arch_hash_ops); - return 0; -} -early_initcall(hashlib_init); diff --git a/lib/hexdump.c b/lib/hexdump.c index 270773b91923..7ea09699855d 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -97,63 +97,79 @@ EXPORT_SYMBOL(bin2hex); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + * + * Return: + * The amount of bytes placed in the buffer without terminating NUL. If the + * output was truncated, then the return value is the number of bytes + * (excluding the terminating NUL) which would have been written to the final + * string if enough space had been available. */ -void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, char *linebuf, size_t linebuflen, - bool ascii) +int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, + char *linebuf, size_t linebuflen, bool ascii) { const u8 *ptr = buf; + int ngroups; u8 ch; int j, lx = 0; int ascii_column; + int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; - if (!len) - goto nil; if (len > rowsize) /* limit to one line at a time */ len = rowsize; + if (!is_power_of_2(groupsize) || groupsize > 8) + groupsize = 1; if ((len % groupsize) != 0) /* no mixed size output */ groupsize = 1; - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; + ngroups = len / groupsize; + ascii_column = rowsize * 2 + rowsize / groupsize + 1; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } + if (!linebuflen) + goto overflow1; - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; + if (!len) + goto nil; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } + if (groupsize == 8) { + const u64 *ptr8 = buf; - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else if (groupsize == 4) { + const u32 *ptr4 = buf; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%8.8x", j ? " " : "", + *(ptr4 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else if (groupsize == 2) { + const u16 *ptr2 = buf; - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%4.4x", j ? " " : "", + *(ptr2 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else { + for (j = 0; j < len; j++) { + if (linebuflen < lx + 3) + goto overflow2; ch = ptr[j]; linebuf[lx++] = hex_asc_hi(ch); linebuf[lx++] = hex_asc_lo(ch); @@ -161,21 +177,28 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, } if (j) lx--; - - ascii_column = 3 * rowsize + 2; - break; } if (!ascii) goto nil; - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + while (lx < ascii_column) { + if (linebuflen < lx + 2) + goto overflow2; linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + } + for (j = 0; j < len; j++) { + if (linebuflen < lx + 2) + goto overflow2; ch = ptr[j]; linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; } nil: + linebuf[lx] = '\0'; + return lx; +overflow2: linebuf[lx++] = '\0'; +overflow1: + return ascii ? ascii_column + len : (groupsize * 2 + 1) * ngroups - 1; } EXPORT_SYMBOL(hex_dump_to_buffer); diff --git a/lib/idr.c b/lib/idr.c index e654aebd5f80..5335c43adf46 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -30,7 +30,6 @@ #include <linux/idr.h> #include <linux/spinlock.h> #include <linux/percpu.h> -#include <linux/hardirq.h> #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) diff --git a/lib/interval_tree.c b/lib/interval_tree.c index f367f9ad544c..c85f6600a5f8 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,7 +1,7 @@ -#include <linux/init.h> #include <linux/interval_tree.h> #include <linux/interval_tree_generic.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #define START(node) ((node)->start) #define LAST(node) ((node)->last) diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..df30632f0bef --- /dev/null +++ b/lib/iommu-common.c @@ -0,0 +1,270 @@ +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/iommu-helper.h> +#include <linux/iommu-common.h> +#include <linux/dma-mapping.h> +#include <linux/hash.h> + +#ifndef DMA_ERROR_CODE +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) +#endif + +static unsigned long iommu_large_alloc = 15; + +static DEFINE_PER_CPU(unsigned int, iommu_hash_common); + +static inline bool need_flush(struct iommu_map_table *iommu) +{ + return (iommu->lazy_flush != NULL && + (iommu->flags & IOMMU_NEED_FLUSH) != 0); +} + +static inline void set_flush(struct iommu_map_table *iommu) +{ + iommu->flags |= IOMMU_NEED_FLUSH; +} + +static inline void clear_flush(struct iommu_map_table *iommu) +{ + iommu->flags &= ~IOMMU_NEED_FLUSH; +} + +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + +/* + * Initialize iommu_pool entries for the iommu_map_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than iommu_large_alloc pages. + */ +void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + setup_iommu_pool_hash(); + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->table_shift = table_shift; + iommu->lazy_flush = lazy_flush; + start = 0; + if (skip_span_boundary_check) + iommu->flags |= IOMMU_NO_SPAN_BOUND; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->pools[i].lock)); + iommu->pools[i].start = start; + iommu->pools[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->pools[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} +EXPORT_SYMBOL(iommu_tbl_pool_init); + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned int pool_hash = __this_cpu_read(iommu_hash_common); + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *pool; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > iommu_large_alloc); + unsigned long shift; + unsigned long align_mask = 0; + + if (align_order > 0) + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* Sanity check */ + if (unlikely(npages == 0)) { + WARN_ON_ONCE(1); + return DMA_ERROR_CODE; + } + + if (largealloc) { + pool = &(iommu->large_pool); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + pool = &(iommu->pools[pool_nr]); + } + spin_lock_irqsave(&pool->lock, flags); + + again: + if (pass == 0 && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) + start = *handle; + else + start = pool->hint; + + limit = pool->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning. If a + * flush is needed, it will get done based on the return value + * from iommu_area_alloc() below. + */ + if (start >= limit) + start = pool->start; + shift = iommu->table_map_base >> iommu->table_shift; + if (limit + shift > mask) { + limit = mask - shift + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0 in pool 0. + */ + if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); + pool = &(iommu->pools[0]); + spin_lock(&(pool->lock)); + start = pool->start; + } else { + start &= mask; + } + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->table_shift); + else + boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); + + boundary_size = boundary_size >> iommu->table_shift; + /* + * if the skip_span_boundary_check had been set during init, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, align_mask); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + pool = &(iommu->pools[pool_nr]); + spin_lock(&(pool->lock)); + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else { + /* give up */ + n = DMA_ERROR_CODE; + goto bail; + } + } + if (n < pool->hint || need_flush(iommu)) { + clear_flush(iommu); + iommu->lazy_flush(iommu); + } + + end = n + npages; + pool->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; +bail: + spin_unlock_irqrestore(&(pool->lock), flags); + + return n; +} +EXPORT_SYMBOL(iommu_tbl_range_alloc); + +static struct iommu_pool *get_pool(struct iommu_map_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->pools[pool_nr]; + } + return p; +} + +/* Caller supplies the index of the entry into the iommu map table + * itself when the mapping from dma_addr to the entry is not the + * default addr->entry mapping below. + */ +void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, + unsigned long npages, unsigned long entry) +{ + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->table_shift; + + if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ + entry = (dma_addr - iommu->table_map_base) >> shift; + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} +EXPORT_SYMBOL(iommu_tbl_range_free); diff --git a/lib/ioremap.c b/lib/ioremap.c index 0c9216c48762..86c8911b0e3a 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,6 +13,43 @@ #include <asm/cacheflush.h> #include <asm/pgtable.h> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_pud_capable; +static int __read_mostly ioremap_pmd_capable; +static int __read_mostly ioremap_huge_disabled; + +static int __init set_nohugeiomap(char *str) +{ + ioremap_huge_disabled = 1; + return 0; +} +early_param("nohugeiomap", set_nohugeiomap); + +void __init ioremap_huge_init(void) +{ + if (!ioremap_huge_disabled) { + if (arch_ioremap_pud_supported()) + ioremap_pud_capable = 1; + if (arch_ioremap_pmd_supported()) + ioremap_pmd_capable = 1; + } +} + +static inline int ioremap_pud_enabled(void) +{ + return ioremap_pud_capable; +} + +static inline int ioremap_pmd_enabled(void) +{ + return ioremap_pmd_capable; +} + +#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_pud_enabled(void) { return 0; } +static inline int ioremap_pmd_enabled(void) { return 0; } +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ + static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return -ENOMEM; do { next = pmd_addr_end(addr, end); + + if (ioremap_pmd_enabled() && + ((next - addr) == PMD_SIZE) && + IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { + if (pmd_set_huge(pmd, phys_addr + addr, prot)) + continue; + } + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); @@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, return -ENOMEM; do { next = pud_addr_end(addr, end); + + if (ioremap_pud_enabled() && + ((next - addr) == PUD_SIZE) && + IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { + if (pud_set_huge(pud, phys_addr + addr, prot)) + continue; + } + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); diff --git a/lib/iov_iter.c b/lib/iov_iter.c new file mode 100644 index 000000000000..75232ad0a5e7 --- /dev/null +++ b/lib/iov_iter.c @@ -0,0 +1,851 @@ +#include <linux/export.h> +#include <linux/uio.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> + +#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ + size_t left; \ + size_t wanted = n; \ + __p = i->iov; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } else { \ + left = 0; \ + } \ + while (unlikely(!left && n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted - n; \ +} + +#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->kvec; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + (void)(STEP); \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + (void)(STEP); \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted; \ +} + +#define iterate_bvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->bvec; \ + __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ + if (likely(__v.bv_len)) { \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset + skip; \ + (void)(STEP); \ + skip += __v.bv_len; \ + n -= __v.bv_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.bv_len = min_t(size_t, n, __p->bv_len); \ + if (unlikely(!__v.bv_len)) \ + continue; \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset; \ + (void)(STEP); \ + skip = __v.bv_len; \ + n -= __v.bv_len; \ + } \ + n = wanted; \ +} + +#define iterate_all_kinds(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + } \ +} + +#define iterate_and_advance(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + if (skip == bvec->bv_len) { \ + bvec++; \ + skip = 0; \ + } \ + i->nr_segs -= bvec - i->bvec; \ + i->bvec = bvec; \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + if (skip == kvec->iov_len) { \ + kvec++; \ + skip = 0; \ + } \ + i->nr_segs -= kvec - i->kvec; \ + i->kvec = kvec; \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + if (skip == iov->iov_len) { \ + iov++; \ + skip = 0; \ + } \ + i->nr_segs -= iov - i->iov; \ + i->iov = iov; \ + } \ + i->count -= n; \ + i->iov_offset = skip; \ +} + +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_writeable(buf, copy)) { + kaddr = kmap_atomic(page); + from = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = from - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + from = kaddr + offset; + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_readable(buf, copy)) { + kaddr = kmap_atomic(page); + to = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = to - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + to = kaddr + offset; + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +/* + * Fault in the first iovec of the given iov_iter, to a maximum length + * of bytes. Returns 0 on success, or non-zero if the memory could not be + * accessed (ie. because it is an invalid address). + * + * writev-intensive code may want this to prefault several iovecs -- that + * would be possible (callers must not rely on the fact that _only_ the + * first iovec will be faulted with the current implementation). + */ +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) +{ + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_readable); + +/* + * Fault in one or more iovecs of the given iov_iter, to a maximum length of + * bytes. For each iovec, fault in each page that constitutes the iovec. + * + * Return 0 on success, or non-zero if the memory could not be accessed (i.e. + * because it is an invalid address). + */ +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +{ + size_t skip = i->iov_offset; + const struct iovec *iov; + int err; + struct iovec v; + + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + iterate_iovec(i, bytes, v, iov, skip, ({ + err = fault_in_multipages_readable(v.iov_base, + v.iov_len); + if (unlikely(err)) + return err; + 0;})) + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); + +void iov_iter_init(struct iov_iter *i, int direction, + const struct iovec *iov, unsigned long nr_segs, + size_t count) +{ + /* It will get better. Eventually... */ + if (segment_eq(get_fs(), KERNEL_DS)) { + direction |= ITER_KVEC; + i->type = direction; + i->kvec = (struct kvec *)iov; + } else { + i->type = direction; + i->iov = iov; + } + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_init); + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *from = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len), + memcpy_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len), + memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_to_iter); + +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, + v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter); + +size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user_nocache((to += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter_nocache); + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_to_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_from_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __clear_user(v.iov_base, v.iov_len), + memzero_page(v.bv_page, v.bv_offset, v.bv_len), + memset(v.iov_base, 0, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(iov_iter_zero); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr = kmap_atomic(page), *p = kaddr + offset; + iterate_all_kinds(i, bytes, v, + __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + kunmap_atomic(kaddr); + return bytes; +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + iterate_and_advance(i, size, v, 0, 0, 0) +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->bvec->bv_len - i->iov_offset); + else + return min(i->count, i->iov->iov_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +void iov_iter_kvec(struct iov_iter *i, int direction, + const struct kvec *kvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_KVEC)); + i->type = direction; + i->kvec = kvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_kvec); + +void iov_iter_bvec(struct iov_iter *i, int direction, + const struct bio_vec *bvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_BVEC)); + i->type = direction; + i->bvec = bvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_bvec); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + unsigned long res = 0; + size_t size = i->count; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, + (res |= (unsigned long)v.iov_base | v.iov_len, 0), + res |= v.bv_offset | v.bv_len, + res |= (unsigned long)v.iov_base | v.iov_len + ) + return res; +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned maxpages, + size_t *start) +{ + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + if (unlikely(res < 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + get_page(*pages = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages); + +static struct page **get_pages_array(size_t n) +{ + struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); + if (!p) + p = vmalloc(n * sizeof(struct page *)); + return p; +} + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + struct page **p; + + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + p = get_pages_array(n); + if (!p) + return -ENOMEM; + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + if (unlikely(res < 0)) { + kvfree(p); + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + *pages = p = get_pages_array(1); + if (!p) + return -ENOMEM; + get_page(*p = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *to = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_from_user(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck(p + v.bv_offset, + (to += v.bv_len) - v.bv_len, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_from_iter); + +size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *from = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, + p + v.bv_offset, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_to_iter); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + size_t size = i->count; + int npages = 0; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, ({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + 0;}),({ + npages++; + if (npages >= maxpages) + return maxpages; + }),({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + }) + ) + return npages; +} +EXPORT_SYMBOL(iov_iter_npages); + +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) +{ + *new = *old; + if (new->type & ITER_BVEC) + return new->bvec = kmemdup(new->bvec, + new->nr_segs * sizeof(struct bio_vec), + flags); + else + /* iovec and kvec have identical layout */ + return new->iov = kmemdup(new->iov, + new->nr_segs * sizeof(struct iovec), + flags); +} +EXPORT_SYMBOL(dup_iter); + +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +EXPORT_SYMBOL(import_iovec); + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +#endif + +int import_single_range(int rw, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(!rw, buf, len))) + return -EFAULT; + + iov->iov_base = buf; + iov->iov_len = len; + iov_iter_init(i, rw, iov, 1, len); + return 0; +} diff --git a/lib/iovec.c b/lib/iovec.c deleted file mode 100644 index df3abd1eaa4a..000000000000 --- a/lib/iovec.c +++ /dev/null @@ -1,112 +0,0 @@ -#include <linux/uaccess.h> -#include <linux/export.h> -#include <linux/uio.h> - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, len, iov->iov_len); - if (copy_from_user(kdata, iov->iov_base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovec); - -/* - * Copy kernel to iovec. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, iov->iov_len, len); - if (copy_to_user(iov->iov_base, kdata, copy)) - return -EFAULT; - kdata += copy; - len -= copy; - iov->iov_len -= copy; - iov->iov_base += copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovec); - -/* - * Copy kernel to iovec. Returns -EFAULT on error. - */ - -int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, - int offset, int len) -{ - int copy; - for (; len > 0; ++iov) { - /* Skip over the finished iovecs */ - if (unlikely(offset >= iov->iov_len)) { - offset -= iov->iov_len; - continue; - } - copy = min_t(unsigned int, iov->iov_len - offset, len); - if (copy_to_user(iov->iov_base + offset, kdata, copy)) - return -EFAULT; - offset = 0; - kdata += copy; - len -= copy; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovecend); - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - */ - -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len) -{ - /* No data? Done! */ - if (len == 0) - return 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - if (copy_from_user(kdata, base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovecend); diff --git a/lib/kobject.c b/lib/kobject.c index 58751bb80a7c..3b841b97fccd 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -576,8 +576,13 @@ void kobject_del(struct kobject *kobj) */ struct kobject *kobject_get(struct kobject *kobj) { - if (kobj) + if (kobj) { + if (!kobj->state_initialized) + WARN(1, KERN_WARNING "kobject: '%s' (%p): is not " + "initialized, yet kobject_get() is being " + "called.\n", kobject_name(kobj), kobj); kref_get(&kobj->kref); + } return kobj; } @@ -976,7 +981,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) { const struct kobj_ns_type_operations *ops = NULL; - if (parent && parent->ktype->child_ns_type) + if (parent && parent->ktype && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9ebf9e20de53..f6c2c1e7779c 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -20,7 +20,6 @@ #include <linux/export.h> #include <linux/kmod.h> #include <linux/slab.h> -#include <linux/user_namespace.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> diff --git a/lib/lcm.c b/lib/lcm.c index b9c8de461e9e..03d7fcb420b5 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/gcd.h> #include <linux/export.h> #include <linux/lcm.h> @@ -7,10 +7,19 @@ unsigned long lcm(unsigned long a, unsigned long b) { if (a && b) - return (a * b) / gcd(a, b); - else if (b) - return b; - - return a; + return (a / gcd(a, b)) * b; + else + return 0; } EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/list_sort.c b/lib/list_sort.c index 12bcba1c8612..b29015102698 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -2,9 +2,11 @@ #define pr_fmt(fmt) "list_sort_test: " fmt #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/string.h> #include <linux/list_sort.h> -#include <linux/slab.h> #include <linux/list.h> #define MAX_LIST_LENGTH_BITS 20 @@ -146,6 +148,7 @@ EXPORT_SYMBOL(list_sort); #ifdef CONFIG_TEST_LIST_SORT +#include <linux/slab.h> #include <linux/random.h> /* diff --git a/lib/llist.c b/lib/llist.c index f76196d07409..0b0e9779d675 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -24,7 +24,6 @@ */ #include <linux/kernel.h> #include <linux/export.h> -#include <linux/interrupt.h> #include <linux/llist.h> diff --git a/lib/lockref.c b/lib/lockref.c index d2233de9a86e..494994bf17c8 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -18,7 +18,7 @@ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ - old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ @@ -60,7 +60,7 @@ void lockref_get(struct lockref *lockref) EXPORT_SYMBOL(lockref_get); /** - * lockref_get_not_zero - Increments count unless the count is 0 + * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ @@ -70,7 +70,7 @@ int lockref_get_not_zero(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) return 0; , return 1; @@ -78,7 +78,7 @@ int lockref_get_not_zero(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if (lockref->count) { + if (lockref->count > 0) { lockref->count++; retval = 1; } @@ -88,7 +88,7 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** - * lockref_get_or_lock - Increments count unless the count is 0 + * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. @@ -97,14 +97,14 @@ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); - if (!lockref->count) + if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); @@ -113,6 +113,26 @@ int lockref_get_or_lock(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_or_lock); /** + * lockref_put_return - Decrement reference count if possible + * @lockref: pointer to lockref structure + * + * Decrement the reference count and return the new value. + * If the lockref was dead or locked, return an error. + */ +int lockref_put_return(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 0) + return -1; + , + return new.count; + ); + return -1; +} +EXPORT_SYMBOL(lockref_put_return); + +/** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken @@ -158,7 +178,7 @@ int lockref_get_not_dead(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if ((int)old.count < 0) + if (old.count < 0) return 0; , return 1; @@ -166,7 +186,7 @@ int lockref_get_not_dead(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if ((int) lockref->count >= 0) { + if (lockref->count >= 0) { lockref->count++; retval = 1; } diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 852c81e3ba9a..028f5d996eef 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -247,10 +247,11 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) * progress) and "changed", when this in fact lead to an successful * update of the cache. */ - return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", - lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); + seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); + + return 0; } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 7a85967060a5..26cc6029b280 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -47,6 +47,11 @@ #include "lz4defs.h" +static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 +static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + static int lz4_uncompress(const char *source, char *dest, int osize) { const BYTE *ip = (const BYTE *) source; @@ -56,10 +61,6 @@ static int lz4_uncompress(const char *source, char *dest, int osize) BYTE *cpy; unsigned token; size_t length; - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif while (1) { @@ -116,7 +117,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* copy repeated sequence */ if (unlikely((op - ref) < STEPSIZE)) { #if LZ4_ARCH64 - size_t dec64 = dec64table[op - ref]; + int dec64 = dec64table[op - ref]; #else const int dec64 = 0; #endif @@ -139,6 +140,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* Error: request to write beyond destination buffer */ if (cpy > oend) goto _output_error; + if ((ref + COPYLENGTH) > oend || + (op + COPYLENGTH) > oend) + goto _output_error; LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; @@ -174,11 +178,6 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, BYTE * const oend = op + maxoutputsize; BYTE *cpy; - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - /* Main Loop */ while (ip < iend) { @@ -246,7 +245,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, /* copy repeated sequence */ if (unlikely((op - ref) < STEPSIZE)) { #if LZ4_ARCH64 - size_t dec64 = dec64table[op - ref]; + int dec64 = dec64table[op - ref]; #else const int dec64 = 0; #endif diff --git a/lib/md5.c b/lib/md5.c index 958a3c15923c..bb0cd01d356d 100644 --- a/lib/md5.c +++ b/lib/md5.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/export.h> #include <linux/cryptohash.h> diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index aac511417ad1..a89d041592c8 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -639,7 +639,7 @@ do { \ ************** MIPS ***************** ***************************************/ #if defined(__mips__) && W_TYPE_SIZE == 32 -#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ UDItype __ll = (UDItype)(u) * (v); \ @@ -671,7 +671,7 @@ do { \ ************** MIPS/64 ************** ***************************************/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c index 1871e7b61ca0..d25e9e96c310 100644 --- a/lib/mpi/mpi-cmp.c +++ b/lib/mpi/mpi-cmp.c @@ -57,14 +57,12 @@ int mpi_cmp(MPI u, MPI v) if (usize != vsize && !u->sign && !v->sign) return usize - vsize; if (usize != vsize && u->sign && v->sign) - return vsize + usize; + return vsize - usize; if (!usize) return 0; cmp = mpihelp_cmp(u->d, v->d, usize); - if (!cmp) - return 0; - if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0)) - return 1; - return -1; + if (u->sign) + return -cmp; + return cmp; } EXPORT_SYMBOL_GPL(mpi_cmp); diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 60cf765628e9..c65dd1bff45a 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -84,7 +84,7 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) do { \ mpi_size_t _i; \ for (_i = 0; _i < (n); _i++) \ - (d)[_i] = (d)[_i]; \ + (d)[_i] = (s)[_i]; \ } while (0) #define MPN_COPY_DECR(d, s, n) \ diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 4cc6442733f4..bc0a1da8afba 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -128,28 +128,36 @@ leave: } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); -/**************** - * Return an allocated buffer with the MPI (msb first). - * NBYTES receives the length of this buffer. Caller must free the - * return string (This function does return a 0 byte buffer with NBYTES - * set to zero if the value of A is zero. If sign is not NULL, it will - * be set to the sign of the A. +/** + * mpi_read_buffer() - read MPI to a bufer provided by user (msb first) + * + * @a: a multi precision integer + * @buf: bufer to which the output will be written to. Needs to be at + * leaset mpi_get_size(a) long. + * @buf_len: size of the buf. + * @nbytes: receives the actual length of the data written. + * @sign: if not NULL, it will be set to the sign of a. + * + * Return: 0 on success or error code in case of error */ -void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, + int *sign) { - uint8_t *p, *buffer; + uint8_t *p; mpi_limb_t alimb; + unsigned int n = mpi_get_size(a); int i; - unsigned int n; + + if (buf_len < n || !buf) + return -EINVAL; if (sign) *sign = a->sign; - *nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB; - if (!n) - n++; /* avoid zero length allocation */ - p = buffer = kmalloc(n, GFP_KERNEL); - if (!p) - return NULL; + + if (nbytes) + *nbytes = n; + + p = buf; for (i = a->nlimbs - 1; i >= 0; i--) { alimb = a->d[i]; @@ -171,15 +179,56 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) #error please implement for this limb size. #endif } + return 0; +} +EXPORT_SYMBOL_GPL(mpi_read_buffer); + +/* + * mpi_get_buffer() - Returns an allocated buffer with the MPI (msb first). + * Caller must free the return string. + * This function does return a 0 byte buffer with nbytes set to zero if the + * value of A is zero. + * + * @a: a multi precision integer. + * @nbytes: receives the length of this buffer. + * @sign: if not NULL, it will be set to the sign of the a. + * + * Return: Pointer to MPI buffer or NULL on error + */ +void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +{ + uint8_t *buf, *p; + unsigned int n; + int ret; + + if (!nbytes) + return NULL; + + n = mpi_get_size(a); + + if (!n) + n++; + + buf = kmalloc(n, GFP_KERNEL); + + if (!buf) + return NULL; + + ret = mpi_read_buffer(a, buf, n, nbytes, sign); + + if (ret) { + kfree(buf); + return NULL; + } /* this is sub-optimal but we need to do the shift operation * because the caller has to free the returned buffer */ - for (p = buffer; !*p && *nbytes; p++, --*nbytes) + for (p = buf; !*p && *nbytes; p++, --*nbytes) ; - if (p != buffer) - memmove(buffer, p, *nbytes); + if (p != buf) + memmove(buf, p, *nbytes); - return buffer; + return buf; } EXPORT_SYMBOL_GPL(mpi_get_buffer); diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index bf076d281d40..314f4dfa603e 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -69,7 +69,7 @@ void mpi_free_limb_space(mpi_ptr_t a) if (!a) return; - kfree(a); + kzfree(a); } void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) @@ -95,7 +95,7 @@ int mpi_resize(MPI a, unsigned nlimbs) if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); - kfree(a->d); + kzfree(a->d); a->d = p; } else { a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); @@ -112,7 +112,7 @@ void mpi_free(MPI a) return; if (a->flags & 4) - kfree(a->d); + kzfree(a->d); else mpi_free_limb_space(a->d); diff --git a/lib/nlattr.c b/lib/nlattr.c index 9c3e85ff0a6c..f5907d23272d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> @@ -280,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); + if (count > minlen) + memset(dest + minlen, 0, count - minlen); return minlen; } diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index 0d83ea8a9605..bcce5f149310 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -10,10 +10,11 @@ #ifdef CONFIG_PCI /** - * pci_iomap - create a virtual mapping cookie for a PCI BAR + * pci_iomap_range - create a virtual mapping cookie for a PCI BAR * @dev: PCI device that owns the BAR * @bar: BAR number - * @maxlen: length of the memory to map + * @offset: map memory at the given offset in BAR + * @maxlen: max length of the memory to map * * Using this function you will get a __iomem address to your device BAR. * You can access it using ioread*() and iowrite*(). These functions hide @@ -21,16 +22,21 @@ * you expect from them in the correct way. * * @maxlen specifies the maximum length to map. If you want to get access to - * the complete BAR without checking for its length first, pass %0 here. + * the complete BAR from offset to the end, pass %0 here. * */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +void __iomem *pci_iomap_range(struct pci_dev *dev, + int bar, + unsigned long offset, + unsigned long maxlen) { resource_size_t start = pci_resource_start(dev, bar); resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); - if (!len || !start) + if (len <= offset || !start) return NULL; + len -= offset; + start += offset; if (maxlen && len > maxlen) len = maxlen; if (flags & IORESOURCE_IO) @@ -43,6 +49,25 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) /* What? */ return NULL; } +EXPORT_SYMBOL(pci_iomap_range); +/** + * pci_iomap - create a virtual mapping cookie for a PCI BAR + * @dev: PCI device that owns the BAR + * @bar: BAR number + * @maxlen: length of the memory to map + * + * Using this function you will get a __iomem address to your device BAR. + * You can access it using ioread*() and iowrite*(). These functions hide + * the details if this is a MMIO or PIO address space and will just do what + * you expect from them in the correct way. + * + * @maxlen specifies the maximum length to map. If you want to get access to + * the complete BAR without checking for its length first, pass %0 here. + * */ +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + return pci_iomap_range(dev, bar, 0, maxlen); +} EXPORT_SYMBOL(pci_iomap); #endif /* CONFIG_PCI */ diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 93d145e5539c..f75715131f20 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -19,13 +19,10 @@ #include <linux/bug.h> #include <linux/err.h> #include <linux/export.h> -#include <linux/hardirq.h> -#include <linux/idr.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/string.h> #include <linux/spinlock.h> #include <linux/percpu_ida.h> diff --git a/lib/plist.c b/lib/plist.c index d408e774b746..3a30c53db061 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -25,7 +25,6 @@ #include <linux/bug.h> #include <linux/plist.h> -#include <linux/spinlock.h> #ifdef CONFIG_DEBUG_PI_LIST diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3291a8e37490..061550de77bc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/rcupdate.h> -#include <linux/hardirq.h> /* in_interrupt() */ +#include <linux/preempt.h> /* in_interrupt() */ /* diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 7d0e5cd7b570..975c6e0434bd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -89,10 +89,10 @@ void (*raid6_datap_recov)(int, size_t, int, void **); EXPORT_SYMBOL_GPL(raid6_datap_recov); const struct raid6_recov_calls *const raid6_recov_algos[] = { -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) #ifdef CONFIG_AS_AVX2 &raid6_recov_avx2, #endif +#ifdef CONFIG_AS_SSSE3 &raid6_recov_ssse3, #endif &raid6_recov_intx1, @@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - unsigned long perf, bestperf, j0, j1; + unsigned long perf, bestgenperf, bestxorperf, j0, j1; + int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (perf > bestperf) { - bestperf = perf; + if (perf > bestgenperf) { + bestgenperf = perf; best = *algo; } - pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, + pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + + if (!(*algo)->xor_syndrome) + continue; + + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; + } + preempt_enable(); + + if (best == *algo) + bestxorperf = perf; + + pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); } } if (best) { - pr_info("raid6: using algorithm %s (%ld MB/s)\n", + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", best->name, - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); raid6_call = *best; } else pr_err("raid6: Yikes! No algorithm found!\n"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 7cc12b532e95..bec27fce7501 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -119,6 +119,7 @@ int raid6_have_altivec(void) const struct raid6_calls raid6_altivec$# = { raid6_altivec$#_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_altivec, "altivecx$#", 0 diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index bc3b1dd436eb..76734004358d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x1 = { raid6_avx21_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x1", 1 /* Has cache hints */ @@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x2 = { raid6_avx22_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x2", 1 /* Has cache hints */ @@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x4 = { raid6_avx24_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5b50f8dfc5d2..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) } } +static void raid6_int$#_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + /* P/Q data pages */ + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= start ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + wq$$ = w1$$ ^ w2$$; + } + *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + } + +} + const struct raid6_calls raid6_intx$# = { raid6_int$#_gen_syndrome, - NULL, /* always valid */ + raid6_int$#_xor_syndrome, + NULL, /* always valid */ "int" NSTRING "x$#", 0 }; diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 590c71c9e200..b3b0e1fcd3af 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c @@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx1 = { raid6_mmx1_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx1", 0 @@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx2 = { raid6_mmx2_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx2", 0 diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 36ad4705df1a..d9ad6ee284f4 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -42,6 +42,7 @@ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ + NULL, /* XOR not yet implemented */ \ raid6_have_neon, \ "neonx" #_n, \ 0 \ diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index e1eea433a493..53fe3d7bdfb3 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -8,7 +8,7 @@ * of the License. */ -#if CONFIG_AS_AVX2 +#ifdef CONFIG_AS_AVX2 #include <linux/raid/pq.h> #include "x86.h" diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index a9168328f03b..cda33e56a5e3 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -7,6 +7,8 @@ * of the License. */ +#ifdef CONFIG_AS_SSSE3 + #include <linux/raid/pq.h> #include "x86.h" @@ -330,3 +332,7 @@ const struct raid6_recov_calls raid6_recov_ssse3 = { #endif .priority = 1, }; + +#else +#warning "your version of binutils lacks SSSE3 support" +#endif diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index f76297139445..9025b8ca9aa3 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c @@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x1 = { raid6_sse11_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x1", 1 /* Has cache hints */ @@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x2 = { raid6_sse12_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x2", 1 /* Has cache hints */ diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 85b82c85f28e..1d2276b007ee 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + +static void raid6_sse21_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("pxor %xmm4,%xmm2"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm5,%xmm4"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + const struct raid6_calls raid6_sse2x1 = { raid6_sse21_gen_syndrome, + raid6_sse21_xor_syndrome, raid6_have_sse2, "sse2x1", 1 /* Has cache hints */ @@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse22_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, + raid6_sse22_xor_syndrome, raid6_have_sse2, "sse2x2", 1 /* Has cache hints */ @@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse24_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 64 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32])); + asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32])); + asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + asm volatile("pxor %xmm12,%xmm10"); + asm volatile("pxor %xmm14,%xmm11"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("prefetchnta %0" :: "m" (q[d])); + asm volatile("prefetchnta %0" :: "m" (q[d+32])); + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32])); + asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + } + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + + const struct raid6_calls raid6_sse2x4 = { raid6_sse24_gen_syndrome, + raid6_sse24_xor_syndrome, raid6_have_sse2, "sse2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 5a485b7a7d3c..3bebbabdb510 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -28,11 +28,11 @@ char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE]; char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; -static void makedata(void) +static void makedata(int start, int stop) { int i, j; - for (i = 0; i < NDISKS; i++) { + for (i = start; i <= stop; i++) { for (j = 0; j < PAGE_SIZE; j++) data[i][j] = rand(); @@ -91,34 +91,55 @@ int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; const struct raid6_recov_calls *const *ra; - int i, j; + int i, j, p1, p2; int err = 0; - makedata(); + makedata(0, NDISKS-1); for (ra = raid6_recov_algos; *ra; ra++) { if ((*ra)->valid && !(*ra)->valid()) continue; + raid6_2data_recov = (*ra)->data2; raid6_datap_recov = (*ra)->datap; printf("using recovery %s\n", (*ra)->name); for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + if ((*algo)->valid && !(*algo)->valid()) + continue; + + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + + if (!raid6_call.xor_syndrome) + continue; + + for (p1 = 0; p1 < NDISKS-2; p1++) + for (p2 = p1; p2 < NDISKS-2; p2++) { - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + /* Simulate rmw run */ + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); + makedata(p1, p2); + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); - } } printf("\n"); } diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc index e7c29459cbcd..2dd291a11264 100644 --- a/lib/raid6/tilegx.uc +++ b/lib/raid6/tilegx.uc @@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_tilegx$# = { raid6_tilegx$#_gen_syndrome, + NULL, /* XOR not yet implemented */ NULL, "tilegx$#", 0 diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index b7595484a815..8fe9d9662abb 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -23,7 +23,7 @@ #ifdef __KERNEL__ /* Real code */ -#include <asm/i387.h> +#include <asm/fpu/api.h> #else /* Dummy code for user space testing */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 624a0b7c05ef..8609378e6505 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1,514 +1,675 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * - * Based on the following paper: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/hash.h> +#include <linux/jhash.h> #include <linux/random.h> #include <linux/rhashtable.h> +#include <linux/err.h> +#include <linux/export.h> #define HASH_DEFAULT_SIZE 64UL -#define HASH_MIN_SIZE 4UL - -#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) +#define HASH_MIN_SIZE 4U +#define BUCKET_LOCKS_PER_CPU 128UL -#ifdef CONFIG_PROVE_LOCKING -int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +static u32 head_hashfn(struct rhashtable *ht, + const struct bucket_table *tbl, + const struct rhash_head *he) { - return ht->p.mutex_is_held(); + return rht_head_hashfn(ht, tbl, he, ht->p); } -EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); -#endif -static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) -{ - return (void *) he - ht->p.head_offset; -} +#ifdef CONFIG_PROVE_LOCKING +#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) -static u32 __hashfn(const struct rhashtable *ht, const void *key, - u32 len, u32 hsize) +int lockdep_rht_mutex_is_held(struct rhashtable *ht) { - u32 h; - - h = ht->p.hashfn(key, len, ht->p.hash_rnd); - - return h & (hsize - 1); + return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; } +EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); -/** - * rhashtable_hashfn - compute hash for key of given length - * @ht: hash table to compute for - * @key: pointer to key - * @len: length of key - * - * Computes the hash value using the hash function provided in the 'hashfn' - * of struct rhashtable_params. The returned value is guaranteed to be - * smaller than the number of buckets in the hash table. - */ -u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len) +int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + spinlock_t *lock = rht_bucket_lock(tbl, hash); - return __hashfn(ht, key, len, tbl->size); + return (debug_locks) ? lockdep_is_held(lock) : 1; } -EXPORT_SYMBOL_GPL(rhashtable_hashfn); +EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); +#else +#define ASSERT_RHT_MUTEX(HT) +#endif + -static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize) +static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, + gfp_t gfp) { - if (unlikely(!ht->p.key_len)) { - u32 h; + unsigned int i, size; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); - h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + /* Never allocate more than 0.5 locks per bucket */ + size = min_t(unsigned int, size, tbl->size >> 1); - return h & (hsize - 1); + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE && + gfp == GFP_KERNEL) + tbl->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), + gfp); + if (!tbl->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&tbl->locks[i]); } + tbl->locks_mask = size - 1; - return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize); + return 0; } -/** - * rhashtable_obj_hashfn - compute hash for hashed object - * @ht: hash table to compute for - * @ptr: pointer to hashed object - * - * Computes the hash value using the hash function `hashfn` respectively - * 'obj_hashfn' depending on whether the hash table is set up to work with - * a fixed length key. The returned value is guaranteed to be smaller than - * the number of buckets in the hash table. - */ -u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr) +static void bucket_table_free(const struct bucket_table *tbl) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + if (tbl) + kvfree(tbl->locks); - return obj_hashfn(ht, ptr, tbl->size); + kvfree(tbl); } -EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn); -static u32 head_hashfn(const struct rhashtable *ht, - const struct rhash_head *he, u32 hsize) +static void bucket_table_free_rcu(struct rcu_head *head) { - return obj_hashfn(ht, rht_obj(ht, he), hsize); + bucket_table_free(container_of(head, struct bucket_table, rcu)); } -static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags) +static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, + size_t nbuckets, + gfp_t gfp) { - struct bucket_table *tbl; + struct bucket_table *tbl = NULL; size_t size; + int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, flags); - if (tbl == NULL) + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + gfp != GFP_KERNEL) + tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; tbl->size = nbuckets; + if (alloc_bucket_locks(ht, tbl, gfp) < 0) { + bucket_table_free(tbl); + return NULL; + } + + INIT_LIST_HEAD(&tbl->walkers); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + + for (i = 0; i < nbuckets; i++) + INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); + return tbl; } -static void bucket_table_free(const struct bucket_table *tbl) +static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, + struct bucket_table *tbl) { - kvfree(tbl); + struct bucket_table *new_tbl; + + do { + new_tbl = tbl; + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } while (tbl); + + return new_tbl; } -/** - * rht_grow_above_75 - returns true if nelems > 0.75 * table-size - * @ht: hash table - * @new_size: new table size - */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { - /* Expand table when exceeding 75% load */ - return ht->nelems > (new_size / 4 * 3); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl = rhashtable_last_table(ht, + rht_dereference_rcu(old_tbl->future_tbl, ht)); + struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; + int err = -ENOENT; + struct rhash_head *head, *next, *entry; + spinlock_t *new_bucket_lock; + unsigned int new_hash; + + rht_for_each(entry, old_tbl, old_hash) { + err = 0; + next = rht_dereference_bucket(entry->next, old_tbl, old_hash); + + if (rht_is_a_nulls(next)) + break; + + pprev = &entry->next; + } + + if (err) + goto out; + + new_hash = head_hashfn(ht, new_tbl, entry); + + new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); + + spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); + head = rht_dereference_bucket(new_tbl->buckets[new_hash], + new_tbl, new_hash); + + if (rht_is_a_nulls(head)) + INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash); + else + RCU_INIT_POINTER(entry->next, head); + + rcu_assign_pointer(new_tbl->buckets[new_hash], entry); + spin_unlock(new_bucket_lock); + + rcu_assign_pointer(*pprev, next); + +out: + return err; } -EXPORT_SYMBOL_GPL(rht_grow_above_75); -/** - * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size - * @ht: hash table - * @new_size: new table size - */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static void rhashtable_rehash_chain(struct rhashtable *ht, + unsigned int old_hash) { - /* Shrink table beneath 30% load */ - return ht->nelems < (new_size * 3 / 10); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + spinlock_t *old_bucket_lock; + + old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); + + spin_lock_bh(old_bucket_lock); + while (!rhashtable_rehash_one(ht, old_hash)) + ; + old_tbl->rehash++; + spin_unlock_bh(old_bucket_lock); } -EXPORT_SYMBOL_GPL(rht_shrink_below_30); -static void hashtable_chain_unzip(const struct rhashtable *ht, - const struct bucket_table *new_tbl, - struct bucket_table *old_tbl, size_t n) +static int rhashtable_rehash_attach(struct rhashtable *ht, + struct bucket_table *old_tbl, + struct bucket_table *new_tbl) { - struct rhash_head *he, *p, *next; - unsigned int h; - - /* Old bucket empty, no work needed. */ - p = rht_dereference(old_tbl->buckets[n], ht); - if (!p) - return; + /* Protect future_tbl using the first bucket lock. */ + spin_lock_bh(old_tbl->locks); - /* Advance the old bucket pointer one or more times until it - * reaches a node that doesn't hash to the same bucket as the - * previous node p. Call the previous node p; - */ - h = head_hashfn(ht, p, new_tbl->size); - rht_for_each(he, p->next, ht) { - if (head_hashfn(ht, he, new_tbl->size) != h) - break; - p = he; + /* Did somebody beat us to it? */ + if (rcu_access_pointer(old_tbl->future_tbl)) { + spin_unlock_bh(old_tbl->locks); + return -EEXIST; } - RCU_INIT_POINTER(old_tbl->buckets[n], p->next); - /* Find the subsequent node which does hash to the same - * bucket as node P, or NULL if no such node exists. + /* Make insertions go into the new, empty table right away. Deletions + * and lookups will be attempted in both tables until we synchronize. */ - next = NULL; - if (he) { - rht_for_each(he, he->next, ht) { - if (head_hashfn(ht, he, new_tbl->size) == h) { - next = he; - break; - } - } - } + rcu_assign_pointer(old_tbl->future_tbl, new_tbl); + + /* Ensure the new table is visible to readers. */ + smp_wmb(); + + spin_unlock_bh(old_tbl->locks); + + return 0; +} - /* Set p's next pointer to that subsequent node pointer, - * bypassing the nodes which do not hash to p's bucket +static int rhashtable_rehash_table(struct rhashtable *ht) +{ + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; + struct rhashtable_walker *walker; + unsigned int old_hash; + + new_tbl = rht_dereference(old_tbl->future_tbl, ht); + if (!new_tbl) + return 0; + + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) + rhashtable_rehash_chain(ht, old_hash); + + /* Publish the new table pointer. */ + rcu_assign_pointer(ht->tbl, new_tbl); + + spin_lock(&ht->lock); + list_for_each_entry(walker, &old_tbl->walkers, list) + walker->tbl = NULL; + spin_unlock(&ht->lock); + + /* Wait for readers. All new readers will see the new + * table, and thus no references to the old table will + * remain. */ - RCU_INIT_POINTER(p->next, next); + call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + + return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } /** * rhashtable_expand - Expand hash table while allowing concurrent lookups * @ht: the hash table to expand - * @flags: allocation flags * - * A secondary bucket array is allocated and the hash entries are migrated - * while keeping them on both lists until the end of the RCU grace period. + * A secondary bucket array is allocated and the hash entries are migrated. * * This function may only be called in a context where it is safe to call * synchronize_rcu(), e.g. not within a rcu_read_lock() section. * - * The caller must ensure that no concurrent table mutations take place. - * It is however valid to have concurrent lookups if they are RCU protected. + * The caller must ensure that no concurrent resizing occurs by holding + * ht->mutex. + * + * It is valid to have concurrent insertions and deletions protected by per + * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_expand(struct rhashtable *ht, gfp_t flags) +static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); - struct rhash_head *he; - unsigned int i, h; - bool complete; + int err; ASSERT_RHT_MUTEX(ht); - if (ht->p.max_shift && ht->shift >= ht->p.max_shift) - return 0; + old_tbl = rhashtable_last_table(ht, old_tbl); - new_tbl = bucket_table_alloc(old_tbl->size * 2, flags); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; - ht->shift++; - - /* For each new bucket, search the corresponding old bucket - * for the first entry that hashes to the new bucket, and - * link the new bucket to that entry. Since all the entries - * which will end up in the new bucket appear in the same - * old bucket, this constructs an entirely valid new hash - * table, but with multiple buckets "zipped" together into a - * single imprecise chain. - */ - for (i = 0; i < new_tbl->size; i++) { - h = i & (old_tbl->size - 1); - rht_for_each(he, old_tbl->buckets[h], ht) { - if (head_hashfn(ht, he, new_tbl->size) == i) { - RCU_INIT_POINTER(new_tbl->buckets[i], he); - break; - } - } - } - - /* Publish the new table pointer. Lookups may now traverse - * the new table, but they will not benefit from any - * additional efficiency until later steps unzip the buckets. - */ - rcu_assign_pointer(ht->tbl, new_tbl); + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); - /* Unzip interleaved hash chains */ - do { - /* Wait for readers. All new readers will see the new - * table, and thus no references to the old table will - * remain. - */ - synchronize_rcu(); - - /* For each bucket in the old table (each of which - * contains items from multiple buckets of the new - * table): ... - */ - complete = true; - for (i = 0; i < old_tbl->size; i++) { - hashtable_chain_unzip(ht, new_tbl, old_tbl, i); - if (old_tbl->buckets[i] != NULL) - complete = false; - } - } while (!complete); - - bucket_table_free(old_tbl); - return 0; + return err; } -EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink - * @flags: allocation flags * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * This function shrinks the hash table to fit, i.e., the smallest + * size would not cause it to expand right away automatically. + * + * The caller must ensure that no concurrent resizing occurs by holding + * ht->mutex. * * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. + * + * It is valid to have concurrent insertions and deletions protected by per + * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) +static int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); - struct rhash_head __rcu **pprev; - unsigned int i; + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + unsigned int size; + int err; ASSERT_RHT_MUTEX(ht); - if (ht->shift <= ht->p.min_shift) + size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + if (size < ht->p.min_size) + size = ht->p.min_size; + + if (old_tbl->size <= size) return 0; - ntbl = bucket_table_alloc(tbl->size / 2, flags); - if (ntbl == NULL) + if (rht_dereference(old_tbl->future_tbl, ht)) + return -EEXIST; + + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); + if (new_tbl == NULL) return -ENOMEM; - ht->shift--; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); - /* Link each bucket in the new table to the first bucket - * in the old table that contains entries which will hash - * to the new bucket. - */ - for (i = 0; i < ntbl->size; i++) { - ntbl->buckets[i] = tbl->buckets[i]; + return err; +} + +static void rht_deferred_worker(struct work_struct *work) +{ + struct rhashtable *ht; + struct bucket_table *tbl; + int err = 0; + + ht = container_of(work, struct rhashtable, run_work); + mutex_lock(&ht->mutex); + + tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); + + if (rht_grow_above_75(ht, tbl)) + rhashtable_expand(ht); + else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) + rhashtable_shrink(ht); + + err = rhashtable_rehash_table(ht); + + mutex_unlock(&ht->mutex); - /* Link each bucket in the new table to the first bucket - * in the old table that contains entries which will hash - * to the new bucket. + if (err) + schedule_work(&ht->run_work); +} + +static bool rhashtable_check_elasticity(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) +{ + unsigned int elasticity = ht->elasticity; + struct rhash_head *head; + + rht_for_each(head, tbl, hash) + if (!--elasticity) + return true; + + return false; +} + +int rhashtable_insert_rehash(struct rhashtable *ht) +{ + struct bucket_table *old_tbl; + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int size; + int err; + + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = rhashtable_last_table(ht, old_tbl); + + size = tbl->size; + + if (rht_grow_above_75(ht, tbl)) + size *= 2; + /* Do not schedule more than one rehash */ + else if (old_tbl != tbl) + return -EBUSY; + + new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); + if (new_tbl == NULL) { + /* Schedule async resize/rehash to try allocation + * non-atomic context. */ - for (pprev = &ntbl->buckets[i]; *pprev != NULL; - pprev = &rht_dereference(*pprev, ht)->next) - ; - RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); + schedule_work(&ht->run_work); + return -ENOMEM; } - /* Publish the new, valid hash table */ - rcu_assign_pointer(ht->tbl, ntbl); + err = rhashtable_rehash_attach(ht, tbl, new_tbl); + if (err) { + bucket_table_free(new_tbl); + if (err == -EEXIST) + err = 0; + } else + schedule_work(&ht->run_work); - /* Wait for readers. No new readers will have references to the - * old hash table. - */ - synchronize_rcu(); + return err; +} +EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); - bucket_table_free(tbl); +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *tbl) +{ + struct rhash_head *head; + unsigned int hash; + int err; - return 0; + tbl = rhashtable_last_table(ht, tbl); + hash = head_hashfn(ht, tbl, obj); + spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + + err = -EEXIST; + if (key && rhashtable_lookup_fast(ht, key, ht->p)) + goto exit; + + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto exit; + + err = -EAGAIN; + if (rhashtable_check_elasticity(ht, tbl, hash) || + rht_grow_above_100(ht, tbl)) + goto exit; + + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + +exit: + spin_unlock(rht_bucket_lock(tbl, hash)); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_shrink); +EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** - * rhashtable_insert - insert object into hash hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @flags: allocation flags (table expansion) + * rhashtable_walk_init - Initialise an iterator + * @ht: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. * - * Will automatically grow the table via rhashtable_expand() if the the - * grow_decision function specified at rhashtable_init() returns true. + * For a completely stable walk you should construct your own data + * structure outside the hash table. * - * The caller must ensure that no concurrent table mutations occur. It is - * however valid to have concurrent lookups if they are RCU protected. + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit if this function returns + * successfully. */ -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - gfp_t flags) +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); - u32 hash; + iter->ht = ht; + iter->p = NULL; + iter->slot = 0; + iter->skip = 0; - ASSERT_RHT_MUTEX(ht); + iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL); + if (!iter->walker) + return -ENOMEM; - hash = head_hashfn(ht, obj, tbl->size); - RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); - rcu_assign_pointer(tbl->buckets[hash], obj); - ht->nelems++; + mutex_lock(&ht->mutex); + iter->walker->tbl = rht_dereference(ht->tbl, ht); + list_add(&iter->walker->list, &iter->walker->tbl->walkers); + mutex_unlock(&ht->mutex); - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) - rhashtable_expand(ht, flags); + return 0; } -EXPORT_SYMBOL_GPL(rhashtable_insert); +EXPORT_SYMBOL_GPL(rhashtable_walk_init); /** - * rhashtable_remove_pprev - remove object from hash table given previous element - * @ht: hash table - * @obj: pointer to hash head inside object - * @pprev: pointer to previous element - * @flags: allocation flags (table expansion) - * - * Identical to rhashtable_remove() but caller is alreayd aware of the element - * in front of the element to be deleted. This is in particular useful for - * deletion when combined with walking or lookup. + * rhashtable_walk_exit - Free an iterator + * @iter: Hash table Iterator + * + * This function frees resources allocated by rhashtable_walk_init. */ -void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev, gfp_t flags) +void rhashtable_walk_exit(struct rhashtable_iter *iter) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); - - ASSERT_RHT_MUTEX(ht); - - RCU_INIT_POINTER(*pprev, obj->next); - ht->nelems--; - - if (ht->p.shrink_decision && - ht->p.shrink_decision(ht, tbl->size)) - rhashtable_shrink(ht, flags); + mutex_lock(&iter->ht->mutex); + if (iter->walker->tbl) + list_del(&iter->walker->list); + mutex_unlock(&iter->ht->mutex); + kfree(iter->walker); } -EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); +EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** - * rhashtable_remove - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @flags: allocation flags (table expansion) + * rhashtable_walk_start - Start a hash table walk + * @iter: Hash table iterator * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. + * Start a hash table walk. Note that we take the RCU lock in all + * cases including when we return an error. So you must always call + * rhashtable_walk_stop to clean up. * - * Will automatically shrink the table via rhashtable_expand() if the the - * shrink_decision function specified at rhashtable_init() returns true. + * Returns zero if successful. * - * The caller must ensure that no concurrent table mutations occur. It is - * however valid to have concurrent lookups if they are RCU protected. + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may use it immediately + * by calling rhashtable_walk_next. */ -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj, - gfp_t flags) +int rhashtable_walk_start(struct rhashtable_iter *iter) + __acquires(RCU) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); - struct rhash_head __rcu **pprev; - struct rhash_head *he; - u32 h; + struct rhashtable *ht = iter->ht; - ASSERT_RHT_MUTEX(ht); + mutex_lock(&ht->mutex); - h = head_hashfn(ht, obj, tbl->size); + if (iter->walker->tbl) + list_del(&iter->walker->list); - pprev = &tbl->buckets[h]; - rht_for_each(he, tbl->buckets[h], ht) { - if (he != obj) { - pprev = &he->next; - continue; - } + rcu_read_lock(); + + mutex_unlock(&ht->mutex); - rhashtable_remove_pprev(ht, he, pprev, flags); - return true; + if (!iter->walker->tbl) { + iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); + return -EAGAIN; } - return false; + return 0; } -EXPORT_SYMBOL_GPL(rhashtable_remove); +EXPORT_SYMBOL_GPL(rhashtable_walk_start); /** - * rhashtable_lookup - lookup key in hash table - * @ht: hash table - * @key: pointer to key + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. * - * This lookup function may only be used for fixed key hash table (key_len - * paramter set). It will BUG() if used inappropriately. + * Returns the next object or NULL when the end of the table is reached. * - * Lookups may occur in parallel with hash mutations as long as the lookup is - * guarded by rcu_read_lock(). The caller must take care of this. + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may continue to use it. */ -void *rhashtable_lookup(const struct rhashtable *ht, const void *key) +void *rhashtable_walk_next(struct rhashtable_iter *iter) { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct rhash_head *he; - u32 h; - - BUG_ON(!ht->p.key_len); - - h = __hashfn(ht, key, ht->p.key_len, tbl->size); - rht_for_each_rcu(he, tbl->buckets[h], ht) { - if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, - ht->p.key_len)) - continue; - return (void *) he - ht->p.head_offset; + struct bucket_table *tbl = iter->walker->tbl; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + void *obj = NULL; + + if (p) { + p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + goto next; } - return NULL; + for (; iter->slot < tbl->size; iter->slot++) { + int skip = iter->skip; + + rht_for_each_rcu(p, tbl, iter->slot) { + if (!skip) + break; + skip--; + } + +next: + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + obj = rht_obj(ht, p); + goto out; + } + + iter->skip = 0; + } + + /* Ensure we see any new tables. */ + smp_rmb(); + + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker->tbl) { + iter->slot = 0; + iter->skip = 0; + return ERR_PTR(-EAGAIN); + } + + iter->p = NULL; + +out: + + return obj; } -EXPORT_SYMBOL_GPL(rhashtable_lookup); +EXPORT_SYMBOL_GPL(rhashtable_walk_next); /** - * rhashtable_lookup_compare - search hash table with compare function - * @ht: hash table - * @hash: hash value of desired entry - * @compare: compare function, must return true on match - * @arg: argument passed on to compare function - * - * Traverses the bucket chain behind the provided hash value and calls the - * specified compare function for each entry. - * - * Lookups may occur in parallel with hash mutations as long as the lookup is - * guarded by rcu_read_lock(). The caller must take care of this. + * rhashtable_walk_stop - Finish a hash table walk + * @iter: Hash table iterator * - * Returns the first entry on which the compare function returned true. + * Finish a hash table walk. */ -void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, - bool (*compare)(void *, void *), void *arg) +void rhashtable_walk_stop(struct rhashtable_iter *iter) + __releases(RCU) { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct rhash_head *he; + struct rhashtable *ht; + struct bucket_table *tbl = iter->walker->tbl; - if (unlikely(hash >= tbl->size)) - return NULL; + if (!tbl) + goto out; - rht_for_each_rcu(he, tbl->buckets[hash], ht) { - if (!compare(rht_obj(ht, he), arg)) - continue; - return (void *) he - ht->p.head_offset; - } + ht = iter->ht; + + spin_lock(&ht->lock); + if (tbl->rehash < tbl->size) + list_add(&iter->walker->list, &tbl->walkers); + else + iter->walker->tbl = NULL; + spin_unlock(&ht->lock); - return NULL; + iter->p = NULL; + +out: + rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); +EXPORT_SYMBOL_GPL(rhashtable_walk_stop); -static size_t rounded_hashtable_size(struct rhashtable_params *params) +static size_t rounded_hashtable_size(const struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - 1UL << params->min_shift); + (unsigned long)params->min_size); +} + +static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) +{ + return jhash2(key, length, seed); } /** @@ -531,8 +692,8 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * .head_offset = offsetof(struct test_obj, node), * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), - * .hashfn = arch_fast_hash, - * .mutex_is_held = &my_mutex_is_held, + * .hashfn = jhash, + * .nulls_base = (1U << RHT_BASE_SHIFT), * }; * * Configuration Example 2: Variable length keys @@ -541,7 +702,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * @@ -550,245 +711,139 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * * struct rhashtable_params params = { * .head_offset = offsetof(struct test_obj, node), - * .hashfn = arch_fast_hash, + * .hashfn = jhash, * .obj_hashfn = my_hash_fn, - * .mutex_is_held = &my_mutex_is_held, * }; */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; size = HASH_DEFAULT_SIZE; - if ((params->key_len && !params->hashfn) || - (!params->key_len && !params->obj_hashfn)) + if ((!params->key_len && !params->obj_hashfn) || + (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; - params->min_shift = max_t(size_t, params->min_shift, - ilog2(HASH_MIN_SIZE)); + if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) + return -EINVAL; if (params->nelem_hint) size = rounded_hashtable_size(params); - tbl = bucket_table_alloc(size, GFP_KERNEL); - if (tbl == NULL) - return -ENOMEM; - memset(ht, 0, sizeof(*ht)); - ht->shift = ilog2(tbl->size); + mutex_init(&ht->mutex); + spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); - RCU_INIT_POINTER(ht->tbl, tbl); - - if (!ht->p.hash_rnd) - get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - - return 0; -} -EXPORT_SYMBOL_GPL(rhashtable_init); - -/** - * rhashtable_destroy - destroy hash table - * @ht: the hash table to destroy - * - * Frees the bucket array. This function is not rcu safe, therefore the caller - * has to make sure that no resizing may happen by unpublishing the hashtable - * and waiting for the quiescent cycle before releasing the bucket array. - */ -void rhashtable_destroy(const struct rhashtable *ht) -{ - bucket_table_free(ht->tbl); -} -EXPORT_SYMBOL_GPL(rhashtable_destroy); -/************************************************************************** - * Self Test - **************************************************************************/ - -#ifdef CONFIG_TEST_RHASHTABLE - -#define TEST_HT_SIZE 8 -#define TEST_ENTRIES 2048 -#define TEST_PTR ((void *) 0xdeadbeef) -#define TEST_NEXPANDS 4 - -static int test_mutex_is_held(void) -{ - return 1; -} + if (params->min_size) + ht->p.min_size = roundup_pow_of_two(params->min_size); + + if (params->max_size) + ht->p.max_size = rounddown_pow_of_two(params->max_size); + + if (params->insecure_max_entries) + ht->p.insecure_max_entries = + rounddown_pow_of_two(params->insecure_max_entries); + else + ht->p.insecure_max_entries = ht->p.max_size * 2; + + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + + /* The maximum (not average) chain length grows with the + * size of the hash table, at a rate of (log N)/(log log N). + * The value of 16 is selected so that even if the hash + * table grew to 2^32 you would not expect the maximum + * chain length to exceed it unless we are under attack + * (or extremely unlucky). + * + * As this limit is only to detect attacks, we don't need + * to set it to a lower value as you'd need the chain + * length to vastly exceed 16 to have any real effect + * on the system. + */ + if (!params->insecure_elasticity) + ht->elasticity = 16; -struct test_obj { - void *ptr; - int value; - struct rhash_head node; -}; + if (params->locks_mul) + ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); + else + ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; -static int __init test_rht_lookup(struct rhashtable *ht) -{ - unsigned int i; + ht->key_len = ht->p.key_len; + if (!params->hashfn) { + ht->p.hashfn = jhash; - for (i = 0; i < TEST_ENTRIES * 2; i++) { - struct test_obj *obj; - bool expected = !(i % 2); - u32 key = i; - - obj = rhashtable_lookup(ht, &key); - - if (expected && !obj) { - pr_warn("Test failed: Could not find key %u\n", key); - return -ENOENT; - } else if (!expected && obj) { - pr_warn("Test failed: Unexpected entry found for key %u\n", - key); - return -EEXIST; - } else if (expected && obj) { - if (obj->ptr != TEST_PTR || obj->value != i) { - pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", - obj->ptr, TEST_PTR, obj->value, i); - return -EINVAL; - } + if (!(ht->key_len & (sizeof(u32) - 1))) { + ht->key_len /= sizeof(u32); + ht->p.hashfn = rhashtable_jhash2; } } - return 0; -} - -static void test_bucket_stats(struct rhashtable *ht, - struct bucket_table *tbl, - bool quiet) -{ - unsigned int cnt, i, total = 0; - struct test_obj *obj; + tbl = bucket_table_alloc(ht, size, GFP_KERNEL); + if (tbl == NULL) + return -ENOMEM; - for (i = 0; i < tbl->size; i++) { - cnt = 0; + atomic_set(&ht->nelems, 0); - if (!quiet) - pr_info(" [%#4x/%zu]", i, tbl->size); + RCU_INIT_POINTER(ht->tbl, tbl); - rht_for_each_entry_rcu(obj, tbl->buckets[i], node) { - cnt++; - total++; - if (!quiet) - pr_cont(" [%p],", obj); - } + INIT_WORK(&ht->run_work, rht_deferred_worker); - if (!quiet) - pr_cont("\n [%#x] first element: %p, chain length: %u\n", - i, tbl->buckets[i], cnt); - } - - pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", - total, ht->nelems, TEST_ENTRIES); + return 0; } +EXPORT_SYMBOL_GPL(rhashtable_init); -static int __init test_rhashtable(struct rhashtable *ht) +/** + * rhashtable_free_and_destroy - free elements and destroy hash table + * @ht: the hash table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * Stops an eventual async resize. If defined, invokes free_fn for each + * element to releasal resources. Please note that RCU protected + * readers may still be accessing the elements. Releasing of resources + * must occur in a compatible manner. Then frees the bucket array. + * + * This function will eventually sleep to wait for an async resize + * to complete. The caller is responsible that no further write operations + * occurs in parallel. + */ +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg) { - struct bucket_table *tbl; - struct test_obj *obj, *next; - int err; + const struct bucket_table *tbl; unsigned int i; - /* - * Insertion Test: - * Insert TEST_ENTRIES into table with all keys even numbers - */ - pr_info(" Adding %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - struct test_obj *obj; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) { - err = -ENOMEM; - goto error; + cancel_work_sync(&ht->run_work); + + mutex_lock(&ht->mutex); + tbl = rht_dereference(ht->tbl, ht); + if (free_fn) { + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + + for (pos = rht_dereference(tbl->buckets[i], ht), + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + !rht_is_a_nulls(pos); + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL) + free_fn(rht_obj(ht, pos), arg); } - - obj->ptr = TEST_PTR; - obj->value = i * 2; - - rhashtable_insert(ht, &obj->node, GFP_KERNEL); - } - - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); - test_bucket_stats(ht, tbl, true); - test_rht_lookup(ht); - rcu_read_unlock(); - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - rhashtable_expand(ht, GFP_KERNEL); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - rhashtable_shrink(ht, GFP_KERNEL); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - pr_info(" Deleting %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - u32 key = i * 2; - - obj = rhashtable_lookup(ht, &key); - BUG_ON(!obj); - - rhashtable_remove(ht, &obj->node, GFP_KERNEL); - kfree(obj); } - return 0; - -error: - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node) - kfree(obj); - - return err; + bucket_table_free(tbl); + mutex_unlock(&ht->mutex); } +EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); -static int __init test_rht_init(void) +void rhashtable_destroy(struct rhashtable *ht) { - struct rhashtable ht; - struct rhashtable_params params = { - .nelem_hint = TEST_HT_SIZE, - .head_offset = offsetof(struct test_obj, node), - .key_offset = offsetof(struct test_obj, value), - .key_len = sizeof(int), - .hashfn = arch_fast_hash, - .mutex_is_held = &test_mutex_is_held, - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; - int err; - - pr_info("Running resizable hashtable tests...\n"); - - err = rhashtable_init(&ht, ¶ms); - if (err < 0) { - pr_warn("Test failed: Unable to initialize hashtable: %d\n", - err); - return err; - } - - err = test_rhashtable(&ht); - - rhashtable_destroy(&ht); - - return err; + return rhashtable_free_and_destroy(ht, NULL, NULL); } - -subsys_initcall(test_rht_init); - -#endif /* CONFIG_TEST_RHASHTABLE */ +EXPORT_SYMBOL_GPL(rhashtable_destroy); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c9f2e8c6ccc9..99fbc2f238c4 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -56,6 +56,38 @@ int sg_nents(struct scatterlist *sg) } EXPORT_SYMBOL(sg_nents); +/** + * sg_nents_for_len - return total count of entries in scatterlist + * needed to satisfy the supplied length + * @sg: The scatterlist + * @len: The total required length + * + * Description: + * Determines the number of entries in sg that are required to meet + * the supplied length, taking into acount chaining as well + * + * Returns: + * the number of sg entries needed, negative error on failure + * + **/ +int sg_nents_for_len(struct scatterlist *sg, u64 len) +{ + int nents; + u64 total; + + if (!len) + return 0; + + for (nents = 0, total = 0; sg; sg = sg_next(sg)) { + nents++; + total += sg->length; + if (total >= len) + return nents; + } + + return -EINVAL; +} +EXPORT_SYMBOL(sg_nents_for_len); /** * sg_last - return the last scatterlist entry in a list diff --git a/lib/seq_buf.c b/lib/seq_buf.c new file mode 100644 index 000000000000..5c94e1012a91 --- /dev/null +++ b/lib/seq_buf.c @@ -0,0 +1,323 @@ +/* + * seq_buf.c + * + * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * The seq_buf is a handy tool that allows you to pass a descriptor around + * to a buffer that other functions can write to. It is similar to the + * seq_file functionality but has some differences. + * + * To use it, the seq_buf must be initialized with seq_buf_init(). + * This will set up the counters within the descriptor. You can call + * seq_buf_init() more than once to reset the seq_buf to start + * from scratch. + */ +#include <linux/uaccess.h> +#include <linux/seq_file.h> +#include <linux/seq_buf.h> + +/** + * seq_buf_can_fit - can the new data fit in the current buffer? + * @s: the seq_buf descriptor + * @len: The length to see if it can fit in the current buffer + * + * Returns true if there's enough unused space in the seq_buf buffer + * to fit the amount of new data according to @len. + */ +static bool seq_buf_can_fit(struct seq_buf *s, size_t len) +{ + return s->len + len <= s->size; +} + +/** + * seq_buf_print_seq - move the contents of seq_buf into a seq_file + * @m: the seq_file descriptor that is the destination + * @s: the seq_buf descriptor that is the source. + * + * Returns zero on success, non zero otherwise + */ +int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) +{ + unsigned int len = seq_buf_used(s); + + return seq_write(m, s->buffer, len); +} + +/** + * seq_buf_vprintf - sequence printing of information. + * @s: seq_buf descriptor + * @fmt: printf format string + * @args: va_list of arguments from a printf() type function + * + * Writes a vnprintf() format into the sequencce buffer. + * + * Returns zero on success, -1 on overflow. + */ +int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) +{ + int len; + + WARN_ON(s->size == 0); + + if (s->len < s->size) { + len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); + if (s->len + len < s->size) { + s->len += len; + return 0; + } + } + seq_buf_set_overflow(s); + return -1; +} + +/** + * seq_buf_printf - sequence printing of information + * @s: seq_buf descriptor + * @fmt: printf format string + * + * Writes a printf() format into the sequence buffer. + * + * Returns zero on success, -1 on overflow. + */ +int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = seq_buf_vprintf(s, fmt, ap); + va_end(ap); + + return ret; +} + +#ifdef CONFIG_BINARY_PRINTF +/** + * seq_buf_bprintf - Write the printf string from binary arguments + * @s: seq_buf descriptor + * @fmt: The format string for the @binary arguments + * @binary: The binary arguments for @fmt. + * + * When recording in a fast path, a printf may be recorded with just + * saving the format and the arguments as they were passed to the + * function, instead of wasting cycles converting the arguments into + * ASCII characters. Instead, the arguments are saved in a 32 bit + * word array that is defined by the format string constraints. + * + * This function will take the format and the binary array and finish + * the conversion into the ASCII string within the buffer. + * + * Returns zero on success, -1 on overflow. + */ +int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) +{ + unsigned int len = seq_buf_buffer_left(s); + int ret; + + WARN_ON(s->size == 0); + + if (s->len < s->size) { + ret = bstr_printf(s->buffer + s->len, len, fmt, binary); + if (s->len + ret < s->size) { + s->len += ret; + return 0; + } + } + seq_buf_set_overflow(s); + return -1; +} +#endif /* CONFIG_BINARY_PRINTF */ + +/** + * seq_buf_puts - sequence printing of simple string + * @s: seq_buf descriptor + * @str: simple string to record + * + * Copy a simple string into the sequence buffer. + * + * Returns zero on success, -1 on overflow + */ +int seq_buf_puts(struct seq_buf *s, const char *str) +{ + unsigned int len = strlen(str); + + WARN_ON(s->size == 0); + + if (seq_buf_can_fit(s, len)) { + memcpy(s->buffer + s->len, str, len); + s->len += len; + return 0; + } + seq_buf_set_overflow(s); + return -1; +} + +/** + * seq_buf_putc - sequence printing of simple character + * @s: seq_buf descriptor + * @c: simple character to record + * + * Copy a single character into the sequence buffer. + * + * Returns zero on success, -1 on overflow + */ +int seq_buf_putc(struct seq_buf *s, unsigned char c) +{ + WARN_ON(s->size == 0); + + if (seq_buf_can_fit(s, 1)) { + s->buffer[s->len++] = c; + return 0; + } + seq_buf_set_overflow(s); + return -1; +} + +/** + * seq_buf_putmem - write raw data into the sequenc buffer + * @s: seq_buf descriptor + * @mem: The raw memory to copy into the buffer + * @len: The length of the raw memory to copy (in bytes) + * + * There may be cases where raw memory needs to be written into the + * buffer and a strcpy() would not work. Using this function allows + * for such cases. + * + * Returns zero on success, -1 on overflow + */ +int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) +{ + WARN_ON(s->size == 0); + + if (seq_buf_can_fit(s, len)) { + memcpy(s->buffer + s->len, mem, len); + s->len += len; + return 0; + } + seq_buf_set_overflow(s); + return -1; +} + +#define MAX_MEMHEX_BYTES 8U +#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) + +/** + * seq_buf_putmem_hex - write raw memory into the buffer in ASCII hex + * @s: seq_buf descriptor + * @mem: The raw memory to write its hex ASCII representation of + * @len: The length of the raw memory to copy (in bytes) + * + * This is similar to seq_buf_putmem() except instead of just copying the + * raw memory into the buffer it writes its ASCII representation of it + * in hex characters. + * + * Returns zero on success, -1 on overflow + */ +int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, + unsigned int len) +{ + unsigned char hex[HEX_CHARS]; + const unsigned char *data = mem; + unsigned int start_len; + int i, j; + + WARN_ON(s->size == 0); + + while (len) { + start_len = min(len, HEX_CHARS - 1); +#ifdef __BIG_ENDIAN + for (i = 0, j = 0; i < start_len; i++) { +#else + for (i = start_len-1, j = 0; i >= 0; i--) { +#endif + hex[j++] = hex_asc_hi(data[i]); + hex[j++] = hex_asc_lo(data[i]); + } + if (WARN_ON_ONCE(j == 0 || j/2 > len)) + break; + + /* j increments twice per loop */ + len -= j / 2; + hex[j++] = ' '; + + seq_buf_putmem(s, hex, j); + if (seq_buf_has_overflowed(s)) + return -1; + } + return 0; +} + +/** + * seq_buf_path - copy a path into the sequence buffer + * @s: seq_buf descriptor + * @path: path to write into the sequence buffer. + * @esc: set of characters to escape in the output + * + * Write a path name into the sequence buffer. + * + * Returns the number of written bytes on success, -1 on overflow + */ +int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) +{ + char *buf; + size_t size = seq_buf_get_buf(s, &buf); + int res = -1; + + WARN_ON(s->size == 0); + + if (size) { + char *p = d_path(path, buf, size); + if (!IS_ERR(p)) { + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; + } + } + seq_buf_commit(s, res); + + return res; +} + +/** + * seq_buf_to_user - copy the squence buffer to user space + * @s: seq_buf descriptor + * @ubuf: The userspace memory location to copy to + * @cnt: The amount to copy + * + * Copies the sequence buffer into the userspace memory pointed to + * by @ubuf. It starts from the last read position (@s->readpos) + * and writes up to @cnt characters or till it reaches the end of + * the content in the buffer (@s->len), which ever comes first. + * + * On success, it returns a positive number of the number of bytes + * it copied. + * + * On failure it returns -EBUSY if all of the content in the + * sequence has been already read, which includes nothing in the + * sequence (@s->len == @s->readpos). + * + * Returns -EFAULT if the copy to userspace fails. + */ +int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt) +{ + int len; + int ret; + + if (!cnt) + return 0; + + if (s->len <= s->readpos) + return -EBUSY; + + len = seq_buf_used(s) - s->readpos; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); + if (ret == cnt) + return -EFAULT; + + cnt -= ret; + + s->readpos += cnt; + return cnt; +} diff --git a/lib/sha1.c b/lib/sha1.c index 1df191e04a24..5a56dfd7b99d 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -198,3 +198,4 @@ void sha_init(__u32 *buf) buf[3] = 0x10325476; buf[4] = 0xc3d2e1f0; } +EXPORT_SYMBOL(sha_init); diff --git a/lib/show_mem.c b/lib/show_mem.c index 09225796991a..adc98e1825ba 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -6,8 +6,8 @@ */ #include <linux/mm.h> -#include <linux/nmi.h> #include <linux/quicklist.h> +#include <linux/cma.h> void show_mem(unsigned int filter) { @@ -28,7 +28,7 @@ void show_mem(unsigned int filter) continue; total += zone->present_pages; - reserved = zone->present_pages - zone->managed_pages; + reserved += zone->present_pages - zone->managed_pages; if (is_highmem_idx(zoneid)) highmem += zone->present_pages; @@ -38,7 +38,12 @@ void show_mem(unsigned int filter) printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); +#ifdef CONFIG_CMA + printk("%lu pages reserved\n", (reserved - totalcma_pages)); + printk("%lu pages cma reserved\n", totalcma_pages); +#else printk("%lu pages reserved\n", reserved); +#endif #ifdef CONFIG_QUICKLIST printk("%lu pages in pagetable cache\n", quicklist_total_size()); diff --git a/lib/sort.c b/lib/sort.c index 926d00429ed2..43c9fe73ae2e 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -4,10 +4,9 @@ * Jan 23 2005 Matt Mackall <mpm@selenic.com> */ -#include <linux/kernel.h> -#include <linux/module.h> +#include <linux/types.h> +#include <linux/export.h> #include <linux/sort.h> -#include <linux/slab.h> static void u32_swap(void *a, void *b, int size) { @@ -85,6 +84,7 @@ void sort(void *base, size_t num, size_t size, EXPORT_SYMBOL(sort); #if 0 +#include <linux/slab.h> /* a simple boot-time regression test */ int cmpint(const void *a, const void *b) diff --git a/lib/stmp_device.c b/lib/stmp_device.c index 8ac9bcc4289a..a904656f4fd7 100644 --- a/lib/stmp_device.c +++ b/lib/stmp_device.c @@ -15,7 +15,8 @@ #include <linux/io.h> #include <linux/errno.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #include <linux/stmp_device.h> #define STMP_MODULE_CLKGATE (1 << 30) diff --git a/lib/string.c b/lib/string.c index 10063300b830..bb3d4b6993c4 100644 --- a/lib/string.c +++ b/lib/string.c @@ -58,14 +58,6 @@ int strncasecmp(const char *s1, const char *s2, size_t len) } EXPORT_SYMBOL(strncasecmp); #endif -#ifndef __HAVE_ARCH_STRNICMP -#undef strnicmp -int strnicmp(const char *s1, const char *s2, size_t len) -{ - return strncasecmp(s1, s2, len); -} -EXPORT_SYMBOL(strnicmp); -#endif #ifndef __HAVE_ARCH_STRCASECMP int strcasecmp(const char *s1, const char *s2) @@ -321,12 +313,12 @@ EXPORT_SYMBOL(strchrnul); */ char *strrchr(const char *s, int c) { - const char *p = s + strlen(s); - do { - if (*p == (char)c) - return (char *)p; - } while (--p >= s); - return NULL; + const char *last = NULL; + do { + if (*s == (char)c) + last = s; + } while (*s++); + return (char *)last; } EXPORT_SYMBOL(strrchr); #endif @@ -604,13 +596,18 @@ EXPORT_SYMBOL(memset); * @s: Pointer to the start of the area. * @count: The size of the area. * + * Note: usually using memset() is just fine (!), but in cases + * where clearing out _local_ data at the end of a scope is + * necessary, memzero_explicit() should be used instead in + * order to prevent the compiler from optimising away zeroing. + * * memzero_explicit() doesn't need an arch-specific version as * it just invokes the one of memset() implicitly. */ void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - OPTIMIZER_HIDE_VAR(s); + barrier_data(s); } EXPORT_SYMBOL(memzero_explicit); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 58b78ba57439..c98ae818eb4e 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -4,6 +4,7 @@ * Copyright 31 August 2008 James Bottomley * Copyright (C) 2013, Intel Corporation */ +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/math64.h> #include <linux/export.h> @@ -14,25 +15,25 @@ /** * string_get_size - get the size in the specified units - * @size: The size to be converted + * @size: The size to be converted in blocks + * @blk_size: Size of the block (use 1 for size in bytes) * @units: units to use (powers of 1000 or 1024) * @buf: buffer to format to * @len: length of buffer * * This function returns a string formatted to 3 significant figures - * giving the size in the required units. Returns 0 on success or - * error on failure. @buf is always zero terminated. + * giving the size in the required units. @buf should have room for + * at least 9 bytes and will always be zero terminated. * */ -int string_get_size(u64 size, const enum string_size_units units, - char *buf, int len) +void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, + char *buf, int len) { static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL + "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" }; static const char *const units_2[] = { - "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", - NULL + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" }; static const char *const *const units_str[] = { [STRING_UNITS_10] = units_10, @@ -43,34 +44,57 @@ int string_get_size(u64 size, const enum string_size_units units, [STRING_UNITS_2] = 1024, }; int i, j; - u64 remainder = 0, sf_cap; + u32 remainder = 0, sf_cap, exp; char tmp[8]; + const char *unit; tmp[0] = '\0'; i = 0; - if (size >= divisor[units]) { - while (size >= divisor[units] && units_str[units][i]) { - remainder = do_div(size, divisor[units]); - i++; - } + if (!size) + goto out; - sf_cap = size; - for (j = 0; sf_cap*10 < 1000; j++) - sf_cap *= 10; + while (blk_size >= divisor[units]) { + remainder = do_div(blk_size, divisor[units]); + i++; + } - if (j) { - remainder *= 1000; - do_div(remainder, divisor[units]); - snprintf(tmp, sizeof(tmp), ".%03lld", - (unsigned long long)remainder); - tmp[j+1] = '\0'; - } + exp = divisor[units] / (u32)blk_size; + if (size >= exp) { + remainder = do_div(size, divisor[units]); + remainder *= blk_size; + i++; + } else { + remainder *= size; + } + + size *= blk_size; + size += remainder / divisor[units]; + remainder %= divisor[units]; + + while (size >= divisor[units]) { + remainder = do_div(size, divisor[units]); + i++; } - snprintf(buf, len, "%lld%s %s", (unsigned long long)size, - tmp, units_str[units][i]); + sf_cap = size; + for (j = 0; sf_cap*10 < 1000; j++) + sf_cap *= 10; - return 0; + if (j) { + remainder *= 1000; + remainder /= divisor[units]; + snprintf(tmp, sizeof(tmp), ".%03u", remainder); + tmp[j+1] = '\0'; + } + + out: + if (i >= ARRAY_SIZE(units_2)) + unit = "UNK"; + else + unit = units_str[units][i]; + + snprintf(buf, len, "%u%s %s", (u32)size, + tmp, unit); } EXPORT_SYMBOL(string_get_size); @@ -243,29 +267,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags) } EXPORT_SYMBOL(string_unescape); -static int escape_passthrough(unsigned char c, char **dst, size_t *osz) +static bool escape_passthrough(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 1) - return -ENOMEM; - - *out++ = c; - - *dst = out; - *osz -= 1; - - return 1; + if (out < end) + *out = c; + *dst = out + 1; + return true; } -static int escape_space(unsigned char c, char **dst, size_t *osz) +static bool escape_space(unsigned char c, char **dst, char *end) { char *out = *dst; unsigned char to; - if (*osz < 2) - return -ENOMEM; - switch (c) { case '\n': to = 'n'; @@ -283,26 +299,25 @@ static int escape_space(unsigned char c, char **dst, size_t *osz) to = 'f'; break; default: - return 0; + return false; } - *out++ = '\\'; - *out++ = to; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = to; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_special(unsigned char c, char **dst, size_t *osz) +static bool escape_special(unsigned char c, char **dst, char *end) { char *out = *dst; unsigned char to; - if (*osz < 2) - return -ENOMEM; - switch (c) { case '\\': to = '\\'; @@ -314,71 +329,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz) to = 'e'; break; default: - return 0; + return false; } - *out++ = '\\'; - *out++ = to; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = to; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_null(unsigned char c, char **dst, size_t *osz) +static bool escape_null(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 2) - return -ENOMEM; - if (c) - return 0; + return false; - *out++ = '\\'; - *out++ = '0'; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = '0'; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_octal(unsigned char c, char **dst, size_t *osz) +static bool escape_octal(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 4) - return -ENOMEM; - - *out++ = '\\'; - *out++ = ((c >> 6) & 0x07) + '0'; - *out++ = ((c >> 3) & 0x07) + '0'; - *out++ = ((c >> 0) & 0x07) + '0'; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = ((c >> 6) & 0x07) + '0'; + ++out; + if (out < end) + *out = ((c >> 3) & 0x07) + '0'; + ++out; + if (out < end) + *out = ((c >> 0) & 0x07) + '0'; + ++out; *dst = out; - *osz -= 4; - - return 1; + return true; } -static int escape_hex(unsigned char c, char **dst, size_t *osz) +static bool escape_hex(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 4) - return -ENOMEM; - - *out++ = '\\'; - *out++ = 'x'; - *out++ = hex_asc_hi(c); - *out++ = hex_asc_lo(c); + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = 'x'; + ++out; + if (out < end) + *out = hex_asc_hi(c); + ++out; + if (out < end) + *out = hex_asc_lo(c); + ++out; *dst = out; - *osz -= 4; - - return 1; + return true; } /** @@ -430,19 +452,17 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz) * it if needs. * * Return: - * The amount of the characters processed to the destination buffer, or - * %-ENOMEM if the size of buffer is not enough to put an escaped character is - * returned. - * - * Even in the case of error @dst pointer will be updated to point to the byte - * after the last processed character. + * The total size of the escaped output that would be generated for + * the given input and flags. To check whether the output was + * truncated, compare the return value to osz. There is room left in + * dst for a '\0' terminator if and only if ret < osz. */ -int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, +int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *esc) { - char *out = *dst, *p = out; + char *p = dst; + char *end = p + osz; bool is_dict = esc && *esc; - int ret = 0; while (isz--) { unsigned char c = *src++; @@ -462,55 +482,26 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, (is_dict && !strchr(esc, c))) { /* do nothing */ } else { - if (flags & ESCAPE_SPACE) { - ret = escape_space(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } - - if (flags & ESCAPE_SPECIAL) { - ret = escape_special(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } - - if (flags & ESCAPE_NULL) { - ret = escape_null(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } + if (flags & ESCAPE_SPACE && escape_space(c, &p, end)) + continue; + + if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end)) + continue; + + if (flags & ESCAPE_NULL && escape_null(c, &p, end)) + continue; /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ - if (flags & ESCAPE_OCTAL) { - ret = escape_octal(c, &p, &osz); - if (ret < 0) - break; + if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end)) continue; - } - if (flags & ESCAPE_HEX) { - ret = escape_hex(c, &p, &osz); - if (ret < 0) - break; + + if (flags & ESCAPE_HEX && escape_hex(c, &p, end)) continue; - } } - ret = escape_passthrough(c, &p, &osz); - if (ret < 0) - break; + escape_passthrough(c, &p, end); } - *dst = p; - - if (ret < 0) - return ret; - - return p - out; + return p - dst; } EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index bb2b201d6ad0..e0af6ff73d14 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,4 +1,5 @@ -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index a28df5206d95..3a5f2b366d84 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -57,7 +57,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, return res + find_zero(data) + 1 - align; } res += sizeof(unsigned long); - if (unlikely(max < sizeof(unsigned long))) + /* We already handled 'unsigned long' bytes. Did we do it all ? */ + if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) @@ -84,13 +85,21 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * @str: The string to measure. * @count: Maximum count (including NUL character) * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * * Returns the size of the string INCLUDING the terminating NUL. - * If the string is too long, returns 'count+1'. + * If the string is too long, returns a number larger than @count. User + * has to check the return value against "> count". * On exception (or invalid count), returns 0. + * + * NOTE! You should basically never use this function. There is + * almost never any valid case for using the length of a user space + * string, since the string can be changed at any time by other + * threads. Use "strncpy_from_user()" instead to get a stable copy + * of the string. */ long strnlen_user(const char __user *str, long count) { @@ -113,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user); * strlen_user: - Get the size of a user string INCLUDING final NUL. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4abda074ea45..42e192decbfd 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -537,8 +537,9 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); * Allocates bounce buffer and returns its kernel virtual address. */ -phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir) +static phys_addr_t +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) { dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); @@ -655,7 +656,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); if (paddr == SWIOTLB_MAP_ERROR) - return NULL; + goto err_warn; ret = phys_to_virt(paddr); dev_addr = phys_to_dma(hwdev, paddr); @@ -669,7 +670,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); - return NULL; + goto err_warn; } } @@ -677,6 +678,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, memset(ret, 0, size); return ret; + +err_warn: + pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + dev_name(hwdev), size); + dump_stack(); + + return NULL; } EXPORT_SYMBOL(swiotlb_alloc_coherent); diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c new file mode 100644 index 000000000000..c227cc43ec0a --- /dev/null +++ b/lib/test-hexdump.c @@ -0,0 +1,180 @@ +/* + * Test cases for lib/hexdump.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/string.h> + +static const unsigned char data_b[] = { + '\xbe', '\x32', '\xdb', '\x7b', '\x0a', '\x18', '\x93', '\xb2', /* 00 - 07 */ + '\x70', '\xba', '\xc4', '\x24', '\x7d', '\x83', '\x34', '\x9b', /* 08 - 0f */ + '\xa6', '\x9c', '\x31', '\xad', '\x9c', '\x0f', '\xac', '\xe9', /* 10 - 17 */ + '\x4c', '\xd1', '\x19', '\x99', '\x43', '\xb1', '\xaf', '\x0c', /* 18 - 1f */ +}; + +static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; + +static const char * const test_data_1_le[] __initconst = { + "be", "32", "db", "7b", "0a", "18", "93", "b2", + "70", "ba", "c4", "24", "7d", "83", "34", "9b", + "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", + "4c", "d1", "19", "99", "43", "b1", "af", "0c", +}; + +static const char *test_data_2_le[] __initdata = { + "32be", "7bdb", "180a", "b293", + "ba70", "24c4", "837d", "9b34", + "9ca6", "ad31", "0f9c", "e9ac", + "d14c", "9919", "b143", "0caf", +}; + +static const char *test_data_4_le[] __initdata = { + "7bdb32be", "b293180a", "24c4ba70", "9b34837d", + "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", +}; + +static const char *test_data_8_le[] __initdata = { + "b293180a7bdb32be", "9b34837d24c4ba70", + "e9ac0f9cad319ca6", "0cafb1439919d14c", +}; + +static void __init test_hexdump(size_t len, int rowsize, int groupsize, + bool ascii) +{ + char test[32 * 3 + 2 + 32 + 1]; + char real[32 * 3 + 2 + 32 + 1]; + char *p; + const char * const *result; + size_t l = len; + int gs = groupsize, rs = rowsize; + unsigned int i; + + hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii); + + if (rs != 16 && rs != 32) + rs = 16; + + if (l > rs) + l = rs; + + if (!is_power_of_2(gs) || gs > 8 || (len % gs != 0)) + gs = 1; + + if (gs == 8) + result = test_data_8_le; + else if (gs == 4) + result = test_data_4_le; + else if (gs == 2) + result = test_data_2_le; + else + result = test_data_1_le; + + memset(test, ' ', sizeof(test)); + + /* hex dump */ + p = test; + for (i = 0; i < l / gs; i++) { + const char *q = *result++; + size_t amount = strlen(q); + + strncpy(p, q, amount); + p += amount + 1; + } + if (i) + p--; + + /* ASCII part */ + if (ascii) { + p = test + rs * 2 + rs / gs + 1; + strncpy(p, data_a, l); + p += l; + } + + *p = '\0'; + + if (strcmp(test, real)) { + pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); + pr_err("Result: '%s'\n", real); + pr_err("Expect: '%s'\n", test); + } +} + +static void __init test_hexdump_set(int rowsize, bool ascii) +{ + size_t d = min_t(size_t, sizeof(data_b), rowsize); + size_t len = get_random_int() % d + 1; + + test_hexdump(len, rowsize, 4, ascii); + test_hexdump(len, rowsize, 2, ascii); + test_hexdump(len, rowsize, 8, ascii); + test_hexdump(len, rowsize, 1, ascii); +} + +static void __init test_hexdump_overflow(bool ascii) +{ + char buf[56]; + const char *t = test_data_1_le[0]; + size_t l = get_random_int() % sizeof(buf); + bool a; + int e, r; + + memset(buf, ' ', sizeof(buf)); + + r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); + + if (ascii) + e = 50; + else + e = 2; + buf[e + 2] = '\0'; + + if (!l) { + a = r == e && buf[0] == ' '; + } else if (l < 3) { + a = r == e && buf[0] == '\0'; + } else if (l < 4) { + a = r == e && !strcmp(buf, t); + } else if (ascii) { + if (l < 51) + a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' '; + else + a = r == e && buf[50] == '\0' && buf[49] == '.'; + } else { + a = r == e && buf[e] == '\0'; + } + + if (!a) { + pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf)); + pr_err("Result: '%s'\n", buf); + } +} + +static int __init test_hexdump_init(void) +{ + unsigned int i; + int rowsize; + + pr_info("Running tests...\n"); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, false); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, true); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(false); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(true); + + return -EINVAL; +} +module_init(test_hexdump_init); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index ab0d30e1e18f..8e376efd88a4 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2, return NULL; } +static __init void +test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc, + int q_test, const char *name) +{ + int q_real; + + q_real = string_escape_mem(in, p, NULL, 0, flags, esc); + if (q_real != q_test) + pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n", + name, flags, q_test, q_real); +} + static __init void test_string_escape(const char *name, const struct test_string_2 *s2, unsigned int flags, const char *esc) { - int q_real = 512; - char *out_test = kmalloc(q_real, GFP_KERNEL); - char *out_real = kmalloc(q_real, GFP_KERNEL); + size_t out_size = 512; + char *out_test = kmalloc(out_size, GFP_KERNEL); + char *out_real = kmalloc(out_size, GFP_KERNEL); char *in = kmalloc(256, GFP_KERNEL); - char *buf = out_real; int p = 0, q_test = 0; + int q_real; if (!out_test || !out_real || !in) goto out; @@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name, q_test += len; } - q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); + q_real = string_escape_mem(in, p, out_real, out_size, flags, esc); test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, q_test); + + test_string_escape_overflow(in, p, flags, esc, q_test, name); + out: kfree(in); kfree(out_real); kfree(out_test); } -static __init void test_string_escape_nomem(void) -{ - char *in = "\eb \\C\007\"\x90\r]"; - char out[64], *buf = out; - int rc = -ENOMEM, ret; - - ret = string_escape_str_any_np(in, &buf, strlen(in), NULL); - if (ret == rc) - return; - - pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc); -} - static int __init test_string_helpers_init(void) { unsigned int i; @@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void) for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); - test_string_escape_nomem(); - return -EINVAL; } module_init(test_string_helpers_init); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 23e070bcf72d..80d78c51f65f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -124,7 +124,7 @@ static struct bpf_test tests[] = { { { 0, 0xfffffffd } } }, { - "DIV_KX", + "DIV_MOD_KX", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 8), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2), @@ -134,12 +134,18 @@ static struct bpf_test tests[] = { BPF_STMT(BPF_MISC | BPF_TAX, 0), BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_K, 0x70000000), BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), BPF_STMT(BPF_RET | BPF_A, 0) }, CLASSIC | FLAG_NO_DATA, { }, - { { 0, 0x40000001 } } + { { 0, 0x20000000 } } }, { "AND_OR_LSH_K", @@ -1756,6 +1762,49 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } } }, + { + "nmap reduced", + .u.insns_int = { + BPF_MOV64_REG(R6, R1), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26), + BPF_MOV32_IMM(R0, 18), + BPF_STX_MEM(BPF_W, R10, R0, -64), + BPF_LDX_MEM(BPF_W, R7, R10, -64), + BPF_LD_IND(BPF_W, R7, 14), + BPF_STX_MEM(BPF_W, R10, R0, -60), + BPF_MOV32_IMM(R0, 280971478), + BPF_STX_MEM(BPF_W, R10, R0, -56), + BPF_LDX_MEM(BPF_W, R7, R10, -56), + BPF_LDX_MEM(BPF_W, R0, R10, -60), + BPF_ALU32_REG(BPF_SUB, R0, R7), + BPF_JMP_IMM(BPF_JNE, R0, 0, 15), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13), + BPF_MOV32_IMM(R0, 22), + BPF_STX_MEM(BPF_W, R10, R0, -56), + BPF_LDX_MEM(BPF_W, R7, R10, -56), + BPF_LD_IND(BPF_H, R7, 14), + BPF_STX_MEM(BPF_W, R10, R0, -52), + BPF_MOV32_IMM(R0, 17366), + BPF_STX_MEM(BPF_W, R10, R0, -48), + BPF_LDX_MEM(BPF_W, R7, R10, -48), + BPF_LDX_MEM(BPF_W, R0, R10, -52), + BPF_ALU32_REG(BPF_SUB, R0, R7), + BPF_JMP_IMM(BPF_JNE, R0, 0, 2), + BPF_MOV32_IMM(R0, 256), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, + { { 38, 256 } } + }, }; static struct net_device dev; diff --git a/lib/test_kasan.c b/lib/test_kasan.c new file mode 100644 index 000000000000..098c08eddfab --- /dev/null +++ b/lib/test_kasan.c @@ -0,0 +1,277 @@ +/* + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <a.ryabinin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/module.h> + +static noinline void __init kmalloc_oob_right(void) +{ + char *ptr; + size_t size = 123; + + pr_info("out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 'x'; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_left(void) +{ + char *ptr; + size_t size = 15; + + pr_info("out-of-bounds to left\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + *ptr = *(ptr - 1); + kfree(ptr); +} + +static noinline void __init kmalloc_node_oob_right(void) +{ + char *ptr; + size_t size = 4096; + + pr_info("kmalloc_node(): out-of-bounds to right\n"); + ptr = kmalloc_node(size, GFP_KERNEL, 0); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_large_oob_rigth(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc large allocation: out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_krealloc_more(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 19; + + pr_info("out-of-bounds after krealloc more\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + + ptr2[size2] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_krealloc_less(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 15; + + pr_info("out-of-bounds after krealloc less\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + ptr2[size1] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_16(void) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + pr_info("kmalloc out-of-bounds for 16-bytes access\n"); + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + kfree(ptr2); + return; + } + *ptr1 = *ptr2; + kfree(ptr1); + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_in_memset(void) +{ + char *ptr; + size_t size = 666; + + pr_info("out-of-bounds in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr, 0, size+5); + kfree(ptr); +} + +static noinline void __init kmalloc_uaf(void) +{ + char *ptr; + size_t size = 10; + + pr_info("use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + *(ptr + 8) = 'x'; +} + +static noinline void __init kmalloc_uaf_memset(void) +{ + char *ptr; + size_t size = 33; + + pr_info("use-after-free in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + memset(ptr, 0, size); +} + +static noinline void __init kmalloc_uaf2(void) +{ + char *ptr1, *ptr2; + size_t size = 43; + + pr_info("use-after-free after another kmalloc\n"); + ptr1 = kmalloc(size, GFP_KERNEL); + if (!ptr1) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr1); + ptr2 = kmalloc(size, GFP_KERNEL); + if (!ptr2) { + pr_err("Allocation failed\n"); + return; + } + + ptr1[40] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmem_cache_oob(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache = kmem_cache_create("test_cache", + size, 0, + 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("out-of-bounds in kmem_cache_alloc\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + *p = p[size]; + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static char global_array[10]; + +static noinline void __init kasan_global_oob(void) +{ + volatile int i = 3; + char *p = &global_array[ARRAY_SIZE(global_array) + i]; + + pr_info("out-of-bounds global variable\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_stack_oob(void) +{ + char stack_array[10]; + volatile int i = 0; + char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; + + pr_info("out-of-bounds on stack\n"); + *(volatile char *)p; +} + +static int __init kmalloc_tests_init(void) +{ + kmalloc_oob_right(); + kmalloc_oob_left(); + kmalloc_node_oob_right(); + kmalloc_large_oob_rigth(); + kmalloc_oob_krealloc_more(); + kmalloc_oob_krealloc_less(); + kmalloc_oob_16(); + kmalloc_oob_in_memset(); + kmalloc_uaf(); + kmalloc_uaf_memset(); + kmalloc_uaf2(); + kmem_cache_oob(); + kasan_stack_oob(); + kasan_global_oob(); + return -EAGAIN; +} + +module_init(kmalloc_tests_init); +MODULE_LICENSE("GPL"); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c new file mode 100644 index 000000000000..b2957540d3c7 --- /dev/null +++ b/lib/test_rhashtable.c @@ -0,0 +1,212 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * Based on the following paper: + * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf + * + * Code partially derived from nft_hash + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/************************************************************************** + * Self Test + **************************************************************************/ + +#include <linux/init.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rcupdate.h> +#include <linux/rhashtable.h> +#include <linux/slab.h> + + +#define TEST_HT_SIZE 8 +#define TEST_ENTRIES 2048 +#define TEST_PTR ((void *) 0xdeadbeef) +#define TEST_NEXPANDS 4 + +struct test_obj { + void *ptr; + int value; + struct rhash_head node; +}; + +static const struct rhashtable_params test_rht_params = { + .nelem_hint = TEST_HT_SIZE, + .head_offset = offsetof(struct test_obj, node), + .key_offset = offsetof(struct test_obj, value), + .key_len = sizeof(int), + .hashfn = jhash, + .nulls_base = (3U << RHT_BASE_SHIFT), +}; + +static int __init test_rht_lookup(struct rhashtable *ht) +{ + unsigned int i; + + for (i = 0; i < TEST_ENTRIES * 2; i++) { + struct test_obj *obj; + bool expected = !(i % 2); + u32 key = i; + + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); + + if (expected && !obj) { + pr_warn("Test failed: Could not find key %u\n", key); + return -ENOENT; + } else if (!expected && obj) { + pr_warn("Test failed: Unexpected entry found for key %u\n", + key); + return -EEXIST; + } else if (expected && obj) { + if (obj->ptr != TEST_PTR || obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", + obj->ptr, TEST_PTR, obj->value, i); + return -EINVAL; + } + } + } + + return 0; +} + +static void test_bucket_stats(struct rhashtable *ht, bool quiet) +{ + unsigned int cnt, rcu_cnt, i, total = 0; + struct rhash_head *pos; + struct test_obj *obj; + struct bucket_table *tbl; + + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) { + rcu_cnt = cnt = 0; + + if (!quiet) + pr_info(" [%#4x/%u]", i, tbl->size); + + rht_for_each_entry_rcu(obj, pos, tbl, i, node) { + cnt++; + total++; + if (!quiet) + pr_cont(" [%p],", obj); + } + + rht_for_each_entry_rcu(obj, pos, tbl, i, node) + rcu_cnt++; + + if (rcu_cnt != cnt) + pr_warn("Test failed: Chain count mismach %d != %d", + cnt, rcu_cnt); + + if (!quiet) + pr_cont("\n [%#x] first element: %p, chain length: %u\n", + i, tbl->buckets[i], cnt); + } + + pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n", + total, atomic_read(&ht->nelems), TEST_ENTRIES); + + if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES) + pr_warn("Test failed: Total count mismatch ^^^"); +} + +static int __init test_rhashtable(struct rhashtable *ht) +{ + struct bucket_table *tbl; + struct test_obj *obj; + struct rhash_head *pos, *next; + int err; + unsigned int i; + + /* + * Insertion Test: + * Insert TEST_ENTRIES into table with all keys even numbers + */ + pr_info(" Adding %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + struct test_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) { + err = -ENOMEM; + goto error; + } + + obj->ptr = TEST_PTR; + obj->value = i * 2; + + err = rhashtable_insert_fast(ht, &obj->node, test_rht_params); + if (err) { + kfree(obj); + goto error; + } + } + + rcu_read_lock(); + test_bucket_stats(ht, true); + test_rht_lookup(ht); + rcu_read_unlock(); + + rcu_read_lock(); + test_bucket_stats(ht, true); + rcu_read_unlock(); + + pr_info(" Deleting %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + u32 key = i * 2; + + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); + BUG_ON(!obj); + + rhashtable_remove_fast(ht, &obj->node, test_rht_params); + kfree(obj); + } + + return 0; + +error: + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_safe(obj, pos, next, tbl, i, node) + kfree(obj); + + return err; +} + +static struct rhashtable ht; + +static int __init test_rht_init(void) +{ + int err; + + pr_info("Running resizable hashtable tests...\n"); + + err = rhashtable_init(&ht, &test_rht_params); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + return err; + } + + err = test_rhashtable(&ht); + + rhashtable_destroy(&ht); + + return err; +} + +static void __exit test_rht_exit(void) +{ +} + +module_init(test_rht_init); +module_exit(test_rht_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/lib/timerqueue.c b/lib/timerqueue.c index a382e4a32609..782ae8ca2c06 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -36,7 +36,7 @@ * Adds the timer node to the timerqueue, sorted by the * node's expires value. */ -void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) +bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { struct rb_node **p = &head->head.rb_node; struct rb_node *parent = NULL; @@ -56,8 +56,11 @@ void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &head->head); - if (!head->next || node->expires.tv64 < head->next->expires.tv64) + if (!head->next || node->expires.tv64 < head->next->expires.tv64) { head->next = node; + return true; + } + return false; } EXPORT_SYMBOL_GPL(timerqueue_add); @@ -69,7 +72,7 @@ EXPORT_SYMBOL_GPL(timerqueue_add); * * Removes the timer node from the timerqueue. */ -void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) +bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) { WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); @@ -82,6 +85,7 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) } rb_erase(&node->node, &head->head); RB_CLEAR_NODE(&node->node); + return head->next != NULL; } EXPORT_SYMBOL_GPL(timerqueue_del); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ec337f64f52d..da39c608a28c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -17,6 +17,7 @@ */ #include <stdarg.h> +#include <linux/clk-provider.h> #include <linux/module.h> /* for KSYM_SYMBOL_LEN */ #include <linux/types.h> #include <linux/string.h> @@ -32,6 +33,7 @@ #include <asm/page.h> /* for PAGE_SIZE */ #include <asm/sections.h> /* for dereference_function_descriptor() */ +#include <asm/byteorder.h> /* cpu_to_le16 */ #include <linux/string_helpers.h> #include "kstrtox.h" @@ -114,148 +116,152 @@ int skip_atoi(const char **s) { int i = 0; - while (isdigit(**s)) + do { i = i*10 + *((*s)++) - '0'; + } while (isdigit(**s)); return i; } -/* Decimal conversion is by far the most typical, and is used - * for /proc and /sys data. This directly impacts e.g. top performance - * with many processes running. We optimize it for speed - * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> - * (with permission from the author, Douglas W. Jones). +/* + * Decimal conversion is by far the most typical, and is used for + * /proc and /sys data. This directly impacts e.g. top performance + * with many processes running. We optimize it for speed by emitting + * two characters at a time, using a 200 byte lookup table. This + * roughly halves the number of multiplications compared to computing + * the digits one at a time. Implementation strongly inspired by the + * previous version, which in turn used ideas described at + * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission + * from the author, Douglas W. Jones). + * + * It turns out there is precisely one 26 bit fixed-point + * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32 + * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual + * range happens to be somewhat larger (x <= 1073741898), but that's + * irrelevant for our purpose. + * + * For dividing a number in the range [10^4, 10^6-1] by 100, we still + * need a 32x32->64 bit multiply, so we simply use the same constant. + * + * For dividing a number in the range [100, 10^4-1] by 100, there are + * several options. The simplest is (x * 0x147b) >> 19, which is valid + * for all x <= 43698. */ -#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 -/* Formats correctly any integer in [0, 999999999] */ +static const u16 decpair[100] = { +#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030) + _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9), + _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19), + _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29), + _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39), + _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49), + _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59), + _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69), + _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79), + _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89), + _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99), +#undef _ +}; + +/* + * This will print a single '0' even if r == 0, since we would + * immediately jump to out_r where two 0s would be written but only + * one of them accounted for in buf. This is needed by ip4_string + * below. All other callers pass a non-zero value of r. +*/ static noinline_for_stack -char *put_dec_full9(char *buf, unsigned q) +char *put_dec_trunc8(char *buf, unsigned r) { - unsigned r; + unsigned q; - /* - * Possible ways to approx. divide by 10 - * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) - * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) - * (x * 0x6667) >> 18 x < 43699 - * (x * 0x3334) >> 17 x < 16389 - * (x * 0x199a) >> 16 x < 16389 - * (x * 0x0ccd) >> 15 x < 16389 - * (x * 0x0667) >> 14 x < 2739 - * (x * 0x0334) >> 13 x < 1029 - * (x * 0x019a) >> 12 x < 1029 - * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) - * (x * 0x0067) >> 10 x < 179 - * (x * 0x0034) >> 9 x < 69 same - * (x * 0x001a) >> 8 x < 69 same - * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) - * (x * 0x0007) >> 6 x < 19 - * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> - */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 1 */ - q = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = (r - 10 * q) + '0'; /* 2 */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 3 */ - q = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = (r - 10 * q) + '0'; /* 4 */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 5 */ - /* Now value is under 10000, can avoid 64-bit multiply */ - q = (r * 0x199a) >> 16; - *buf++ = (r - 10 * q) + '0'; /* 6 */ - r = (q * 0xcd) >> 11; - *buf++ = (q - 10 * r) + '0'; /* 7 */ - q = (r * 0xcd) >> 11; - *buf++ = (r - 10 * q) + '0'; /* 8 */ - *buf++ = q + '0'; /* 9 */ + /* 1 <= r < 10^8 */ + if (r < 100) + goto out_r; + + /* 100 <= r < 10^8 */ + q = (r * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; + + /* 1 <= q < 10^6 */ + if (q < 100) + goto out_q; + + /* 100 <= q < 10^6 */ + r = (q * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[q - 100*r]; + buf += 2; + + /* 1 <= r < 10^4 */ + if (r < 100) + goto out_r; + + /* 100 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; +out_q: + /* 1 <= q < 100 */ + r = q; +out_r: + /* 1 <= r < 100 */ + *((u16 *)buf) = decpair[r]; + buf += r < 10 ? 1 : 2; return buf; } -#endif -/* Similar to above but do not pad with zeros. - * Code can be easily arranged to print 9 digits too, but our callers - * always call put_dec_full9() instead when the number has 9 decimal digits. - */ +#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64 static noinline_for_stack -char *put_dec_trunc8(char *buf, unsigned r) +char *put_dec_full8(char *buf, unsigned r) { unsigned q; - /* Copy of previous function's body with added early returns */ - while (r >= 10000) { - q = r + '0'; - r = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = q - 10*r; - } + /* 0 <= r < 10^8 */ + q = (r * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; - q = (r * 0x199a) >> 16; /* r <= 9999 */ - *buf++ = (r - 10 * q) + '0'; - if (q == 0) - return buf; - r = (q * 0xcd) >> 11; /* q <= 999 */ - *buf++ = (q - 10 * r) + '0'; - if (r == 0) - return buf; - q = (r * 0xcd) >> 11; /* r <= 99 */ - *buf++ = (r - 10 * q) + '0'; - if (q == 0) - return buf; - *buf++ = q + '0'; /* q <= 9 */ - return buf; -} + /* 0 <= q < 10^6 */ + r = (q * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[q - 100*r]; + buf += 2; -/* There are two algorithms to print larger numbers. - * One is generic: divide by 1000000000 and repeatedly print - * groups of (up to) 9 digits. It's conceptually simple, - * but requires a (unsigned long long) / 1000000000 division. - * - * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, - * manipulates them cleverly and generates groups of 4 decimal digits. - * It so happens that it does NOT require long long division. - * - * If long is > 32 bits, division of 64-bit values is relatively easy, - * and we will use the first algorithm. - * If long long is > 64 bits (strange architecture with VERY large long long), - * second algorithm can't be used, and we again use the first one. - * - * Else (if long is 32 bits and long long is 64 bits) we use second one. - */ - -#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 + /* 0 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; -/* First algorithm: generic */ + /* 0 <= q < 100 */ + *((u16 *)buf) = decpair[q]; + buf += 2; + return buf; +} -static +static noinline_for_stack char *put_dec(char *buf, unsigned long long n) { - if (n >= 100*1000*1000) { - while (n >= 1000*1000*1000) - buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); - if (n >= 100*1000*1000) - return put_dec_full9(buf, n); - } + if (n >= 100*1000*1000) + buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); + /* 1 <= n <= 1.6e11 */ + if (n >= 100*1000*1000) + buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); + /* 1 <= n < 1e8 */ return put_dec_trunc8(buf, n); } -#else - -/* Second algorithm: valid only for 64-bit long longs */ +#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64 -/* See comment in put_dec_full9 for choice of constants */ -static noinline_for_stack -void put_dec_full4(char *buf, unsigned q) +static void +put_dec_full4(char *buf, unsigned r) { - unsigned r; - r = (q * 0xccd) >> 15; - buf[0] = (q - 10 * r) + '0'; - q = (r * 0xcd) >> 11; - buf[1] = (r - 10 * q) + '0'; - r = (q * 0xcd) >> 11; - buf[2] = (q - 10 * r) + '0'; - buf[3] = r + '0'; + unsigned q; + + /* 0 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; + /* 0 <= q < 100 */ + *((u16 *)buf) = decpair[q]; } /* @@ -263,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q) * The approximation x/10000 == (x * 0x346DC5D7) >> 43 * holds for all x < 1,128,869,999. The largest value this * helper will ever be asked to convert is 1,125,520,955. - * (d1 in the put_dec code, assuming n is all-ones). + * (second call in the put_dec code, assuming n is all-ones). */ -static +static noinline_for_stack unsigned put_dec_helper4(char *buf, unsigned x) { uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; @@ -292,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n) d2 = (h ) & 0xffff; d3 = (h >> 16); /* implicit "& 0xffff" */ + /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0 + = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */ q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); q = put_dec_helper4(buf, q); @@ -321,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n) */ int num_to_str(char *buf, int size, unsigned long long num) { - char tmp[sizeof(num) * 3]; + /* put_dec requires 2-byte alignment of the buffer. */ + char tmp[sizeof(num) * 3] __aligned(2); int idx, len; /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ @@ -339,11 +348,11 @@ int num_to_str(char *buf, int size, unsigned long long num) return len; } -#define ZEROPAD 1 /* pad with zero */ -#define SIGN 2 /* unsigned/signed long */ +#define SIGN 1 /* unsigned/signed, must be 1 */ +#define LEFT 2 /* left justified */ #define PLUS 4 /* show plus */ #define SPACE 8 /* space if plus */ -#define LEFT 16 /* left justified */ +#define ZEROPAD 16 /* pad with zero, must be 16 == '0' - ' ' */ #define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ #define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ @@ -382,10 +391,8 @@ static noinline_for_stack char *number(char *buf, char *end, unsigned long long num, struct printf_spec spec) { - /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ - static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ - - char tmp[66]; + /* put_dec requires 2-byte alignment of the buffer. */ + char tmp[3 * sizeof(num)] __aligned(2); char sign; char locase; int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); @@ -421,12 +428,7 @@ char *number(char *buf, char *end, unsigned long long num, /* generate full string in tmp[], in reverse order */ i = 0; if (num < spec.base) - tmp[i++] = digits[num] | locase; - /* Generic code, for any base: - else do { - tmp[i++] = (digits[do_div(num,base)] | locase); - } while (num != 0); - */ + tmp[i++] = hex_asc_upper[num] | locase; else if (spec.base != 10) { /* 8 or 16 */ int mask = spec.base - 1; int shift = 3; @@ -434,7 +436,7 @@ char *number(char *buf, char *end, unsigned long long num, if (spec.base == 16) shift = 4; do { - tmp[i++] = (digits[((unsigned char)num) & mask] | locase); + tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase); num >>= shift; } while (num); } else { /* base 10 */ @@ -446,7 +448,7 @@ char *number(char *buf, char *end, unsigned long long num, spec.precision = i; /* leading space padding */ spec.field_width -= spec.precision; - if (!(spec.flags & (ZEROPAD+LEFT))) { + if (!(spec.flags & (ZEROPAD | LEFT))) { while (--spec.field_width >= 0) { if (buf < end) *buf = ' '; @@ -474,7 +476,8 @@ char *number(char *buf, char *end, unsigned long long num, } /* zero or space padding */ if (!(spec.flags & LEFT)) { - char c = (spec.flags & ZEROPAD) ? '0' : ' '; + char c = ' ' + (spec.flags & ZEROPAD); + BUILD_BUG_ON(' ' + ZEROPAD != '0'); while (--spec.field_width >= 0) { if (buf < end) *buf = c; @@ -782,13 +785,102 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, if (spec.field_width > 0) len = min_t(int, spec.field_width, 64); - for (i = 0; i < len && buf < end - 1; i++) { - buf = hex_byte_pack(buf, addr[i]); + for (i = 0; i < len; ++i) { + if (buf < end) + *buf = hex_asc_hi(addr[i]); + ++buf; + if (buf < end) + *buf = hex_asc_lo(addr[i]); + ++buf; + + if (separator && i != len - 1) { + if (buf < end) + *buf = separator; + ++buf; + } + } + + return buf; +} + +static noinline_for_stack +char *bitmap_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + const int CHUNKSZ = 32; + int nr_bits = max_t(int, spec.field_width, 0); + int i, chunksz; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 }; + + chunksz = nr_bits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nr_bits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + u32 chunkmask, val; + int word, bit; + + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (bitmap[word] >> bit) & chunkmask; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; - if (buf < end && separator && i != len - 1) - *buf++ = separator; + spec.field_width = DIV_ROUND_UP(chunksz, 4); + buf = number(buf, end, val, spec); + + chunksz = CHUNKSZ; } + return buf; +} +static noinline_for_stack +char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + int nr_bits = max_t(int, spec.field_width, 0); + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .base = 10 }; + + rbot = cur = find_first_bit(bitmap, nr_bits); + while (cur < nr_bits) { + rtop = cur; + cur = find_next_bit(bitmap, nr_bits, cur + 1); + if (cur < nr_bits && cur <= rtop + 1) + continue; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; + + buf = number(buf, end, rbot, spec); + if (rbot < rtop) { + if (buf < end) + *buf = '-'; + buf++; + + buf = number(buf, end, rtop, spec); + } + + rbot = cur; + } return buf; } @@ -860,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt) break; } for (i = 0; i < 4; i++) { - char temp[3]; /* hold each IP quad in reverse order */ + char temp[4] __aligned(2); /* hold each IP quad in reverse order */ int digits = put_dec_trunc8(temp, addr[index]) - temp; if (leading_zeros) { if (digits < 3) @@ -1151,8 +1243,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, len = spec.field_width < 0 ? 1 : spec.field_width; - /* Ignore the error. We print as many characters as we can */ - string_escape_mem(addr, len, &buf, end - buf, flags, NULL); + /* + * string_escape_mem() writes as many characters as it can to + * the given buffer, and returns the total size of the output + * had the buffer been big enough. + */ + buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL); return buf; } @@ -1240,6 +1336,30 @@ char *address_val(char *buf, char *end, const void *addr, return number(buf, end, num, spec); } +static noinline_for_stack +char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, + const char *fmt) +{ + if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk) + return string(buf, end, NULL, spec); + + switch (fmt[1]) { + case 'r': + return number(buf, end, clk_get_rate(clk), spec); + + case 'n': + default: +#ifdef CONFIG_COMMON_CLK + return string(buf, end, __clk_get_name(clk), spec); +#else + spec.base = 16; + spec.field_width = sizeof(unsigned long) * 2 + 2; + spec.flags |= SPECIAL | SMALL | ZEROPAD; + return number(buf, end, (unsigned long)clk, spec); +#endif + } +} + int kptr_restrict __read_mostly; /* @@ -1257,6 +1377,10 @@ int kptr_restrict __read_mostly; * - 'B' For backtraced symbolic direct pointers with offset * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'b[l]' For a bitmap, the number of bits is determined by the field + * width which must be explicitly specified either as part of the + * format string '%32b[l]' or through '%*b[l]', [l] selects + * range-list format instead of hex format * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -1318,6 +1442,11 @@ int kptr_restrict __read_mostly; * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file + * - 'C' For a clock, it prints the name (Common Clock Framework) or address + * (legacy clock framework) of the clock + * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address + * (legacy clock framework) of the clock + * - 'Cr' For a clock, it prints the current rate of the clock * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1353,6 +1482,13 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); case 'h': return hex_string(buf, end, ptr, spec, fmt); + case 'b': + switch (fmt[1]) { + case 'l': + return bitmap_list_string(buf, end, ptr, spec, fmt); + default: + return bitmap_string(buf, end, ptr, spec, fmt); + } case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ /* [mM]F (FDDI) */ @@ -1455,6 +1591,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); + case 'C': + return clock(buf, end, ptr, spec, fmt); case 'D': return dentry_name(buf, end, ((const struct file *)ptr)->f_path.dentry, @@ -1604,8 +1742,7 @@ qualifier: case 'p': spec->type = FORMAT_TYPE_PTR; - return fmt - start; - /* skip alnum */ + return ++fmt - start; case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; @@ -1646,29 +1783,21 @@ qualifier: if (spec->qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; else if (spec->qualifier == 'l') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_LONG; - else - spec->type = FORMAT_TYPE_ULONG; + BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG); + spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN); } else if (_tolower(spec->qualifier) == 'z') { spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; } else if (spec->qualifier == 'H') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_BYTE; - else - spec->type = FORMAT_TYPE_UBYTE; + BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE); + spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN); } else if (spec->qualifier == 'h') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_SHORT; - else - spec->type = FORMAT_TYPE_USHORT; + BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT); + spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN); } else { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_INT; - else - spec->type = FORMAT_TYPE_UINT; + BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT); + spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN); } return ++fmt - start; @@ -1689,6 +1818,8 @@ qualifier: * %pB output the name of a backtrace symbol with its offset * %pR output the address range in a struct resource with decoded flags * %pr output the address range in a struct resource with raw flags + * %pb output the bitmap with field width as the number of bits + * %pbl output the bitmap as range list with field width as the number of bits * %pM output a 6-byte MAC address with colons * %pMR output a 6-byte MAC address with colons in reversed order * %pMF output a 6-byte MAC address with dashes @@ -1706,6 +1837,11 @@ qualifier: * %*pE[achnops] print an escaped buffer * %*ph[CDN] a variable-length hex string with a separator (supports up to 64 * bytes of the input) + * %pC output the name (Common Clock Framework) or address (legacy clock + * framework) of a clock + * %pCn output the name (Common Clock Framework) or address (legacy clock + * framework) of a clock + * %pCr output the current rate of a clock * %n is ignored * * ** Please update Documentation/printk-formats.txt when making changes ** @@ -1728,7 +1864,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ - if (WARN_ON_ONCE((int) size < 0)) + if (WARN_ON_ONCE(size > INT_MAX)) return 0; str = buf; @@ -1794,7 +1930,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_PTR: - str = pointer(fmt+1, str, end, va_arg(args, void *), + str = pointer(fmt, str, end, va_arg(args, void *), spec); while (isalnum(*fmt)) fmt++; @@ -2232,7 +2368,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } case FORMAT_TYPE_PTR: - str = pointer(fmt+1, str, end, get_arg(void *), spec); + str = pointer(fmt, str, end, get_arg(void *), spec); while (isalnum(*fmt)) fmt++; break; |