From bc22c17e12c130dc929218a95aa347e0f3fd05dc Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:16 +0100 Subject: bzip2/lzma: library support for gzip, bzip2 and lzma decompression Impact: Replaces inflate.c with a wrapper around zlib_inflate; new library code This is the first part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - changed inflate.c to accomodate rest of patch - implementation of bzip2 compression (not used at this stage yet) - implementation of lzma compression (not used at this stage yet) - Makefile routines to support bzip2 and lzma kernel compression Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- lib/decompress_bunzip2.c | 735 ++++++++++++++++++++++++++++++++++++++++++++ lib/decompress_inflate.c | 167 ++++++++++ lib/decompress_unlzma.c | 647 ++++++++++++++++++++++++++++++++++++++ lib/zlib_inflate/inflate.h | 4 + lib/zlib_inflate/inftrees.h | 4 + 5 files changed, 1557 insertions(+) create mode 100644 lib/decompress_bunzip2.c create mode 100644 lib/decompress_inflate.c create mode 100644 lib/decompress_unlzma.c (limited to 'lib') diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c new file mode 100644 index 000000000000..5d3ddb5fcfd9 --- /dev/null +++ b/lib/decompress_bunzip2.c @@ -0,0 +1,735 @@ +/* vi: set sw = 4 ts = 4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + This code is licensed under the LGPLv2: + LGPL (http://www.gnu.org/copyleft/lgpl.html +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_unzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization in the name of the woman I loved, who + passed away Feb. 12, 2003. + + In memory of Toni W. Hagan + + Hospice of Acadiana, Inc. + 2600 Johnston St., Suite 200 + Lafayette, LA 70503-3240 + + Phone (337) 232-1234 or 1-800-738-2226 + Fax (337) 232-1297 + + http://www.hospiceacadiana.com/ + + Manuel + */ + +/* + Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) +*/ + + +#ifndef STATIC +#include +#endif /* !STATIC */ + +#include + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define BZIP2_IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinal value. */ + int limit[MAX_HUFCODE_BITS+1]; + int base[MAX_HUFCODE_BITS]; + int permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + memory that persists between calls to bunzip */ +struct bunzip_data { + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; + /* I/O tracking data (file handles, buffers, positions, etc.) */ + int (*fill)(void*, unsigned int); + int inbufCount, inbufPos /*, outbufPos*/; + unsigned char *inbuf /*,*outbuf*/; + unsigned int inbufBitCount, inbufBits; + /* The CRC values stored in the block header and calculated from the + data */ + unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; + /* Intermediate buffer and its size (in bytes) */ + unsigned int *dbuf, dbufSize; + /* These things are a bit too big to go on the stack */ + unsigned char selectors[32768]; /* nSelectors = 15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ + int io_error; /* non-zero if we have IO error */ +}; + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) +{ + unsigned int bits = 0; + + /* If we need to get more data from the byte buffer, do so. + (Loop getting one byte at a time to enforce endianness and avoid + unaligned access.) */ + while (bd->inbufBitCount < bits_wanted) { + /* If we need to read more data from file into byte buffer, do + so */ + if (bd->inbufPos == bd->inbufCount) { + if (bd->io_error) + return 0; + bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); + if (bd->inbufCount <= 0) { + bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; + return 0; + } + bd->inbufPos = 0; + } + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bd->inbufBitCount >= 24) { + bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); + bits_wanted -= bd->inbufBitCount; + bits <<= bits_wanted; + bd->inbufBitCount = 0; + } + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + } + /* Calculate result */ + bd->inbufBitCount -= bits_wanted; + bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ + +static int INIT get_next_block(struct bunzip_data *bd) +{ + struct group_data *hufGroup = NULL; + int *base = NULL; + int *limit = NULL; + int dbufCount, nextSym, dbufSize, groupCount, selector, + i, j, k, t, runPos, symCount, symTotal, nSelectors, + byteCount[256]; + unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + unsigned int *dbuf, origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) + return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) + return RETVAL_NOT_BZIP_DATA; + /* We can add support for blockRandomised if anybody complains. + There was some code for this in busybox 1.0.0-pre3, but nobody ever + noticed that it didn't actually work. */ + if (get_bits(bd, 1)) + return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if (origPtr > dbufSize) + return RETVAL_DATA_ERROR; + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the + symbols back to the corresponding bytes. */ + t = get_bits(bd, 16); + symTotal = 0; + for (i = 0; i < 16; i++) { + if (t&(1 << (15-i))) { + k = get_bits(bd, 16); + for (j = 0; j < 16; j++) + if (k&(1 << (15-j))) + symToByte[symTotal++] = (16*i)+j; + } + } + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + /* nSelectors: Every GROUP_SIZE many symbols we select a new + Huffman coding group. Read in the group selector list, + which is stored as MTF encoded bit runs. (MTF = Move To + Front, as each value is used it's moved to the start of the + list.) */ + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + for (i = 0; i < nSelectors; i++) { + /* Get next value */ + for (j = 0; get_bits(bd, 1); j++) + if (j >= groupCount) + return RETVAL_DATA_ERROR; + /* Decode MTF to get the next selector */ + uc = mtfSymbol[j]; + for (; j; j--) + mtfSymbol[j] = mtfSymbol[j-1]; + mtfSymbol[0] = selectors[i] = uc; + } + /* Read the Huffman coding tables for each group, which code + for symTotal literal symbols, plus two run symbols (RUNA, + RUNB) */ + symCount = symTotal+2; + for (j = 0; j < groupCount; j++) { + unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp; + /* Read Huffman code lengths for each symbol. They're + stored in a way similar to mtf; record a starting + value for the first symbol, and an offset from the + previous value for everys symbol after that. + (Subtracting 1 before the loop and then adding it + back at the end is an optimization that makes the + test inside the loop simpler: symbol length 0 + becomes negative, so an unsigned inequality catches + it.) */ + t = get_bits(bd, 5)-1; + for (i = 0; i < symCount; i++) { + for (;;) { + if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else + second bit indicates whether to + increment or decrement the value. + Optimization: grab 2 bits and unget + the second if the first was 0. */ + + k = get_bits(bd, 2); + if (k < 2) { + bd->inbufBitCount++; + break; + } + /* Add one if second bit 1, else + * subtract 1. Avoids if/else */ + t += (((k+1)&2)-1); + } + /* Correct for the initial -1, to get the + * final symbol length */ + length[i] = t+1; + } + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) + maxLen = length[i]; + else if (length[i] < minLen) + minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from + * length[]. + * + * permute[] is the lookup table for converting + * Huffman coded symbols into decoded symbols. base[] + * is the amount to subtract from the value of a + * Huffman symbol of a given length when using + * permute[]. + * + * limit[] indicates the largest numerical value a + * symbol with a given number of bits can have. This + * is how the Huffman codes can vary in length: each + * code with a value > limit[length] needs another + * bit. + */ + hufGroup = bd->groups+j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + /* Note that minLen can't be smaller than 1, so we + adjust the base and limit array pointers so we're + not always wasting the first entry. We do this + again when using them (during symbol decoding).*/ + base = hufGroup->base-1; + limit = hufGroup->limit-1; + /* Calculate permute[]. Concurently, initialize + * temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + temp[i] = limit[i] = 0; + for (t = 0; t < symCount; t++) + if (length[t] == i) + hufGroup->permute[pp++] = t; + } + /* Count symbols coded for at each bit length */ + for (i = 0; i < symCount; i++) + temp[length[i]]++; + /* Calculate limit[] (the largest symbol-coding value + *at each bit length, which is (previous limit << + *1)+symbols at this level), and base[] (number of + *symbols to ignore at each bit length, which is limit + *minus the cumulative count of symbols coded for + *already). */ + pp = t = 0; + for (i = minLen; i < maxLen; i++) { + pp += temp[i]; + /* We read the largest possible symbol size + and then unget bits after determining how + many we need, and those extra bits could be + set to anything. (They're noise from + future symbols.) At each level we're + really only interested in the first few + bits, so here we set all the trailing + to-be-ignored bits to 1 so they don't + affect the value > limit[length] + comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + base[i+1] = pp-(t += temp[i]); + } + limit[maxLen+1] = INT_MAX; /* Sentinal value for + * reading next sym. */ + limit[maxLen] = pp+temp[maxLen]-1; + base[minLen] = 0; + } + /* We've finished reading and digesting the block header. Now + read this block's Huffman coded symbols from the file and + undo the Huffman coding and run length encoding, saving the + result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To + * Front table */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (unsigned char)i; + } + /* Loop through compressed symbols. */ + runPos = dbufCount = symCount = selector = 0; + for (;;) { + /* Determine which Huffman coding group to use. */ + if (!(symCount--)) { + symCount = GROUP_SIZE-1; + if (selector >= nSelectors) + return RETVAL_DATA_ERROR; + hufGroup = bd->groups+selectors[selector++]; + base = hufGroup->base-1; + limit = hufGroup->limit-1; + } + /* Read next Huffman-coded symbol. */ + /* Note: It is far cheaper to read maxLen bits and + back up than it is to read minLen bits and then an + additional bit at a time, testing as we go. + Because there is a trailing last block (with file + CRC), there is no danger of the overread causing an + unexpected EOF for a valid compressed file. As a + further optimization, we do the read inline + (falling back to a call to get_bits if the buffer + runs dry). The following (up to got_huff_bits:) is + equivalent to j = get_bits(bd, hufGroup->maxLen); + */ + while (bd->inbufBitCount < hufGroup->maxLen) { + if (bd->inbufPos == bd->inbufCount) { + j = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = + (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount -= hufGroup->maxLen; + j = (bd->inbufBits >> bd->inbufBitCount)& + ((1 << hufGroup->maxLen)-1); +got_huff_bits: + /* Figure how how many bits are in next symbol and + * unget extras */ + i = hufGroup->minLen; + while (j > limit[i]) + ++i; + bd->inbufBitCount += (hufGroup->maxLen - i); + /* Huffman decode value to get nextSym (with bounds checking) */ + if ((i > hufGroup->maxLen) + || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) + >= MAX_SYMBOLS)) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[j]; + /* We have now decoded the symbol, which indicates + either a new literal byte, or a repeated run of the + most recent literal byte. First, check if nextSym + indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ + /* If this is the start of a new run, zero out + * counter */ + if (!runPos) { + runPos = 1; + t = 0; + } + /* Neat trick that saves 1 symbol: instead of + or-ing 0 or 1 at each bit position, add 1 + or 2 instead. For example, 1011 is 1 << 0 + + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 + + 1 << 2. You can make any bit pattern + that way using 1 less symbol than the basic + or 0/1 method (except all bits 0, which + would use no symbols, but a run of length 0 + doesn't mean anything in this context). + Thus space is saved. */ + t += (runPos << nextSym); + /* +runPos if RUNA; +2*runPos if RUNB */ + + runPos <<= 1; + continue; + } + /* When we hit the first non-run symbol after a run, + we now know how many times to repeat the last + literal, so append that many copies to our buffer + of decoded symbols (dbuf) now. (The last literal + used is the one at the head of the mtfSymbol + array.) */ + if (runPos) { + runPos = 0; + if (dbufCount+t >= dbufSize) + return RETVAL_DATA_ERROR; + + uc = symToByte[mtfSymbol[0]]; + byteCount[uc] += t; + while (t--) + dbuf[dbufCount++] = uc; + } + /* Is this the terminating symbol? */ + if (nextSym > symTotal) + break; + /* At this point, nextSym indicates a new literal + character. Subtract one to get the position in the + MTF array at which this literal is currently to be + found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another + instance of the first symbol in the mtf array, + position 0, would have been handled as part of a + run above. Therefore 1 unused mtf position minus 2 + non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) + return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + /* Adjust the MTF array. Since we typically expect to + *move only a small number of symbols, and are bound + *by 256 in any case, using memmove here would + *typically be bigger and slower due to function call + *overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (unsigned int)uc; + } + /* At this point, we've read all the Huffman-coded symbols + (and repeated runs) for this block from the input stream, + and decoded them into the intermediate buffer. There are + dbufCount many decoded bytes in dbuf[]. Now undo the + Burrows-Wheeler transform on dbuf. See + http://dogma.net/markn/articles/bwt/bwt.htm + */ + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + k = j+byteCount[i]; + byteCount[i] = j; + j = k; + } + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uc = (unsigned char)(dbuf[i] & 0xff); + dbuf[byteCount[uc]] |= (i << 8); + byteCount[uc]++; + } + /* Decode first byte by hand to initialize "previous" byte. + Note that it doesn't get output, and if the first three + characters are identical it doesn't qualify as a run (hence + writeRunCountdown = 5). */ + if (dbufCount) { + if (origPtr >= dbufCount) + return RETVAL_DATA_ERROR; + bd->writePos = dbuf[origPtr]; + bd->writeCurrent = (unsigned char)(bd->writePos&0xff); + bd->writePos >>= 8; + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo burrows-wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd =-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. +*/ + +static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) +{ + const unsigned int *dbuf; + int pos, xcurrent, previous, gotcount; + + /* If last read was short due to end of file, return last block now */ + if (bd->writeCount < 0) + return bd->writeCount; + + gotcount = 0; + dbuf = bd->dbuf; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + + if (bd->writeCopies) { + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + /* Loop outputting bytes */ + for (;;) { + /* If the output buffer is full, snapshot + * state and return */ + if (gotcount >= len) { + bd->writePos = pos; + bd->writeCurrent = xcurrent; + bd->writeCopies++; + return len; + } + /* Write next byte into output buffer, updating CRC */ + outbuf[gotcount++] = xcurrent; + bd->writeCRC = (((bd->writeCRC) << 8) + ^bd->crc32Table[((bd->writeCRC) >> 24) + ^xcurrent]); + /* Loop now if we're outputting multiple + * copies of this byte */ + if (bd->writeCopies) { + --bd->writeCopies; + continue; + } +decode_next_byte: + if (!bd->writeCount--) + break; + /* Follow sequence vector to undo + * Burrows-Wheeler transform */ + previous = xcurrent; + pos = dbuf[pos]; + xcurrent = pos&0xff; + pos >>= 8; + /* After 3 consecutive copies of the same + byte, the 4th is a repeat count. We count + down from 4 instead *of counting up because + testing for non-zero is faster */ + if (--bd->writeRunCountdown) { + if (xcurrent != previous) + bd->writeRunCountdown = 4; + } else { + /* We have a repeated run, this byte + * indicates the count */ + bd->writeCopies = xcurrent; + xcurrent = previous; + bd->writeRunCountdown = 5; + /* Sometimes there are just 3 bytes + * (run length 0) */ + if (!bd->writeCopies) + goto decode_next_byte; + /* Subtract the 1 copy we'd output + * anyway to get extras */ + --bd->writeCopies; + } + } + /* Decompression of this block completed successfully */ + bd->writeCRC = ~bd->writeCRC; + bd->totalCRC = ((bd->totalCRC << 1) | + (bd->totalCRC >> 31)) ^ bd->writeCRC; + /* If this block had a CRC error, force file level CRC error. */ + if (bd->writeCRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC+1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next + * block of input */ + /* (previous is just a convenient unused temp variable here) */ + previous = get_next_block(bd); + if (previous) { + bd->writeCount = previous; + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; + } + bd->writeCRC = 0xffffffffUL; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + goto decode_next_byte; +} + +static int INIT nofill(void *buf, unsigned int len) +{ + return -1; +} + +/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, + int (*fill)(void*, unsigned int)) +{ + struct bunzip_data *bd; + unsigned int i, j, c; + const unsigned int BZh0 = + (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) + +(((unsigned int)'h') << 8)+(unsigned int)'0'; + + /* Figure out how much data to allocate */ + i = sizeof(struct bunzip_data); + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = malloc(i); + memset(bd, 0, sizeof(struct bunzip_data)); + /* Setup input buffer */ + bd->inbuf = inbuf; + bd->inbufCount = len; + if (fill != NULL) + bd->fill = fill; + else + bd->fill = nofill; + + /* Init the CRC32 table (big endian) */ + for (i = 0; i < 256; i++) { + c = i << 24; + for (j = 8; j; j--) + c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + bd->crc32Table[i] = c; + } + + /* Ensure that file starts with "BZh['1'-'9']." */ + i = get_bits(bd, 32); + if (((unsigned int)(i-BZh0-1)) >= 9) + return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + bd->dbufSize = 100000*(i-BZh0); + + bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + return RETVAL_OK; +} + +/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, + not end of file.) */ +STATIC int INIT bunzip2(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + int *pos, + void(*error_fn)(char *x)) +{ + struct bunzip_data *bd; + int i = -1; + unsigned char *inbuf; + + set_error_fn(error_fn); + if (flush) + outbuf = malloc(BZIP2_IOBUF_SIZE); + else + len -= 4; /* Uncompressed size hack active in pre-boot + environment */ + if (!outbuf) { + error("Could not allocate output bufer"); + return -1; + } + if (buf) + inbuf = buf; + else + inbuf = malloc(BZIP2_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + i = start_bunzip(&bd, inbuf, len, fill); + if (!i) { + for (;;) { + i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); + if (i <= 0) + break; + if (!flush) + outbuf += i; + else + if (i != flush(outbuf, i)) { + i = RETVAL_UNEXPECTED_OUTPUT_EOF; + break; + } + } + } + /* Check CRC and release memory */ + if (i == RETVAL_LAST_BLOCK) { + if (bd->headerCRC != bd->totalCRC) + error("Data integrity error when decompressing."); + else + i = RETVAL_OK; + } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { + error("Compressed file ends unexpectedly"); + } + if (bd->dbuf) + large_free(bd->dbuf); + if (pos) + *pos = bd->inbufPos; + free(bd); + if (!buf) + free(inbuf); +exit_0: + if (flush) + free(outbuf); + return i; +} + +#define decompress bunzip2 diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c new file mode 100644 index 000000000000..163e66aea5f6 --- /dev/null +++ b/lib/decompress_inflate.c @@ -0,0 +1,167 @@ +#ifdef STATIC +/* Pre-boot environment: included */ + +/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots + * errors about console_printk etc... on ARM */ +#define _LINUX_KERNEL_H + +#include "zlib_inflate/inftrees.c" +#include "zlib_inflate/inffast.c" +#include "zlib_inflate/inflate.c" + +#else /* STATIC */ +/* initramfs et al: linked */ + +#include + +#include "zlib_inflate/inftrees.h" +#include "zlib_inflate/inffast.h" +#include "zlib_inflate/inflate.h" + +#include "zlib_inflate/infutil.h" + +#endif /* STATIC */ + +#include + +#define INBUF_LEN (16*1024) + +/* Included from initramfs et al code */ +STATIC int INIT gunzip(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *out_buf, + int *pos, + void(*error_fn)(char *x)) { + u8 *zbuf; + struct z_stream_s *strm; + int rc; + size_t out_len; + + set_error_fn(error_fn); + rc = -1; + if (flush) { + out_len = 0x8100; /* 32 K */ + out_buf = malloc(out_len); + } else { + out_len = 0x7fffffff; /* no limit */ + } + if (!out_buf) { + error("Out of memory while allocating output buffer"); + goto gunzip_nomem1; + } + + if (buf) + zbuf = buf; + else { + zbuf = malloc(INBUF_LEN); + len = 0; + } + if (!zbuf) { + error("Out of memory while allocating input buffer"); + goto gunzip_nomem2; + } + + strm = malloc(sizeof(*strm)); + if (strm == NULL) { + error("Out of memory while allocating z_stream"); + goto gunzip_nomem3; + } + + strm->workspace = malloc(flush ? zlib_inflate_workspacesize() : + sizeof(struct inflate_state)); + if (strm->workspace == NULL) { + error("Out of memory while allocating workspace"); + goto gunzip_nomem4; + } + + if (len == 0) + len = fill(zbuf, INBUF_LEN); + + /* verify the gzip header */ + if (len < 10 || + zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) { + if (pos) + *pos = 0; + error("Not a gzip file"); + goto gunzip_5; + } + + /* skip over gzip header (1f,8b,08... 10 bytes total + + * possible asciz filename) + */ + strm->next_in = zbuf + 10; + /* skip over asciz filename */ + if (zbuf[3] & 0x8) { + while (strm->next_in[0]) + strm->next_in++; + strm->next_in++; + } + strm->avail_in = len - 10; + + strm->next_out = out_buf; + strm->avail_out = out_len; + + rc = zlib_inflateInit2(strm, -MAX_WBITS); + + if (!flush) { + WS(strm)->inflate_state.wsize = 0; + WS(strm)->inflate_state.window = NULL; + } + + while (rc == Z_OK) { + if (strm->avail_in == 0) { + /* TODO: handle case where both pos and fill are set */ + len = fill(zbuf, INBUF_LEN); + if (len < 0) { + rc = -1; + error("read error"); + break; + } + strm->next_in = zbuf; + strm->avail_in = len; + } + rc = zlib_inflate(strm, 0); + + /* Write any data generated */ + if (flush && strm->next_out > out_buf) { + int l = strm->next_out - out_buf; + if (l != flush(out_buf, l)) { + rc = -1; + error("write error"); + break; + } + strm->next_out = out_buf; + strm->avail_out = out_len; + } + + /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ + if (rc == Z_STREAM_END) { + rc = 0; + break; + } else if (rc != Z_OK) { + error("uncompression error"); + rc = -1; + } + } + + zlib_inflateEnd(strm); + if (pos) + /* add + 8 to skip over trailer */ + *pos = strm->next_in - zbuf+8; + +gunzip_5: + free(strm->workspace); +gunzip_nomem4: + free(strm); +gunzip_nomem3: + if (!buf) + free(zbuf); +gunzip_nomem2: + if (flush) + free(out_buf); +gunzip_nomem1: + return rc; /* returns Z_OK (0) if successful */ +} + +#define decompress gunzip diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c new file mode 100644 index 000000000000..546f2f4c157e --- /dev/null +++ b/lib/decompress_unlzma.c @@ -0,0 +1,647 @@ +/* Lzma decompressor for Linux kernel. Shamelessly snarfed + *from busybox 1.1.1 + * + *Linux kernel adaptation + *Copyright (C) 2006 Alain < alain@knaff.lu > + * + *Based on small lzma deflate implementation/Small range coder + *implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (C) 1999-2005 Igor Pavlov + * + *Copyrights of the parts, see headers below. + * + * + *This program is free software; you can redistribute it and/or + *modify it under the terms of the GNU Lesser General Public + *License as published by the Free Software Foundation; either + *version 2.1 of the License, or (at your option) any later version. + * + *This program is distributed in the hope that it will be useful, + *but WITHOUT ANY WARRANTY; without even the implied warranty of + *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + *Lesser General Public License for more details. + * + *You should have received a copy of the GNU Lesser General Public + *License along with this library; if not, write to the Free Software + *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef STATIC +#include +#endif /* STATIC */ + +#include + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static long long INIT read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (c) 1999-2005 Igor Pavlov + */ + +#include + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + int (*fill)(void*, unsigned int); + uint8_t *ptr; + uint8_t *buffer; + uint8_t *buffer_end; + int buffer_size; + uint32_t code; + uint32_t range; + uint32_t bound; +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +/* Called twice: once at startup and once in rc_normalize() */ +static void INIT rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void INIT rc_init(struct rc *rc, + int (*fill)(void*, unsigned int), + char *buffer, int buffer_size) +{ + rc->fill = fill; + rc->buffer = (uint8_t *)buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void INIT rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called once. TODO: bb_maybe_free() */ +static inline void INIT rc_free(struct rc *rc) +{ + free(rc->buffer); +} + +/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ +static void INIT rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void INIT rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int INIT rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void INIT +rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + uint8_t *buffer; + uint8_t previous_byte; + size_t buffer_pos; + int bufsize; + size_t global_pos; + int(*flush)(void*, unsigned int); + struct lzma_header *header; +}; + +struct cstate { + int state; + uint32_t rep0, rep1, rep2, rep3; +}; + +static inline size_t INIT get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline uint8_t INIT peek_old_byte(struct writer *wr, + uint32_t offs) +{ + if (!wr->flush) { + int32_t pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + uint32_t pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline void INIT write_byte(struct writer *wr, uint8_t byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + wr->flush((char *)wr->buffer, wr->header->dict_size); + } +} + + +static inline void INIT copy_byte(struct writer *wr, uint32_t offs) +{ + write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline void INIT copy_bytes(struct writer *wr, + uint32_t rep0, int len) +{ + do { + copy_byte(wr, rep0); + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); +} + +static inline void INIT process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob, + int lc, uint32_t literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + uint16_t *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + uint16_t *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + write_byte(wr, mi); + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; +} + +static inline void INIT process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob) { + int offset; + uint16_t *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + copy_byte(wr, cst->rep0); + return; + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob = + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return; + } + + len += LZMA_MATCH_MIN_LEN; + + copy_bytes(wr, cst->rep0, len); +} + + + +STATIC inline int INIT unlzma(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error_fn)(char *x) + ) +{ + struct lzma_header header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + set_error_fn(error_fn); + if (!flush) + in_len -= 4; /* Uncompressed size hack active in pre-boot + environment */ + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) + error("bad header"); + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = large_malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); + if (p == 0) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) + process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask); + else { + process_bit1(&wr, &rc, &cst, p, pos_state, prob); + if (cst.rep0 == 0) + break; + } + } + + if (posp) + *posp = rc.ptr-rc.buffer; + if (wr.flush) + wr.flush(wr.buffer, wr.buffer_pos); + ret = 0; + large_free(p); +exit_2: + if (!output) + large_free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +} + +#define decompress unlzma diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h index df8a6c92052d..3d17b3d1b21f 100644 --- a/lib/zlib_inflate/inflate.h +++ b/lib/zlib_inflate/inflate.h @@ -1,3 +1,6 @@ +#ifndef INFLATE_H +#define INFLATE_H + /* inflate.h -- internal inflate state definition * Copyright (C) 1995-2004 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h @@ -105,3 +108,4 @@ struct inflate_state { unsigned short work[288]; /* work area for code table building */ code codes[ENOUGH]; /* space for code tables */ }; +#endif diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h index 5f5219b1240e..b70b4731ac7a 100644 --- a/lib/zlib_inflate/inftrees.h +++ b/lib/zlib_inflate/inftrees.h @@ -1,3 +1,6 @@ +#ifndef INFTREES_H +#define INFTREES_H + /* inftrees.h -- header to use inftrees.c * Copyright (C) 1995-2005 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h @@ -53,3 +56,4 @@ typedef enum { extern int zlib_inflate_table (codetype type, unsigned short *lens, unsigned codes, code **table, unsigned *bits, unsigned short *work); +#endif -- cgit v1.2.3-59-g8ed1b From 30d65dbfe3add7f010a075991dc0bfeaebb7d9e1 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:17 +0100 Subject: bzip2/lzma: config and initramfs support for bzip2/lzma decompression Impact: New code for initramfs decompression, new features This is the second part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - support for new compressions (bzip2 and lzma) in initramfs and old-style ramdisk - config dialog for kernel compression (but new kernel compressions not yet supported) Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- drivers/block/Kconfig | 25 +++++++ init/Kconfig | 50 ++++++++++++++ init/do_mounts_rd.c | 182 ++++++++++++++++++-------------------------------- init/initramfs.c | 123 ++++++++++++---------------------- lib/Makefile | 3 +- 5 files changed, 184 insertions(+), 199 deletions(-) (limited to 'lib') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 0344a8a8321d..795594442428 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,6 +358,31 @@ config BLK_DEV_XIP will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). +config RD_BZIP2 + bool "Initial ramdisk compressed using bzip2" + default n + depends on BLK_DEV_INITRD=y + help + Support loading of a bzip2 encoded initial ramdisk or cpio buffer + If unsure, say N. + +config RD_LZMA + bool "Initial ramdisk compressed using lzma" + default n + depends on BLK_DEV_INITRD=y + help + Support loading of a lzma encoded initial ramdisk or cpio buffer + If unsure, say N. + +config RD_GZIP + bool "Initial ramdisk compressed using gzip" + default y + depends on BLK_DEV_INITRD=y + select ZLIB_INFLATE + help + Support loading of a gzip encoded initial ramdisk or cpio buffer. + If unsure, say Y. + config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/init/Kconfig b/init/Kconfig index f6281711166d..df84625b1373 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -101,6 +101,56 @@ config LOCALVERSION_AUTO which is done within the script "scripts/setlocalversion".) +choice + prompt "Kernel compression mode" + default KERNEL_GZIP + help + The linux kernel is a kind of self-extracting executable. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. + + If you have any problems with bzip2 or lzma compressed + kernels, mail me (Alain Knaff) . (An older + version of this functionality (bzip2 only), for 2.4, was + supplied by Christian Ludwig) + + High compression options are mostly useful for users, who + are low on disk space (embedded systems), but for whom ram + size matters less. + + If in doubt, select 'gzip' + +config KERNEL_GZIP + bool "Gzip" + help + The old and tried gzip compression. Its compression ratio is + the poorest among the 3 choices; however its speed (both + compression and decompression) is the fastest. + +config KERNEL_BZIP2 + bool "Bzip2" + help + Its compression ratio and speed is intermediate. + Decompression speed is slowest among the 3. + The kernel size is about 10 per cent smaller with bzip2, + in comparison to gzip. + Bzip2 uses a large amount of memory. For modern kernels + you will need at least 8MB RAM or more for booting. + +config KERNEL_LZMA + bool "LZMA" + help + The most recent compression algorithm. + Its ratio is best, decompression speed is between the other + 2. Compression is slowest. + The kernel size is about 33 per cent smaller with lzma, + in comparison to gzip. + +endchoice + + config SWAP bool "Support for paging of anonymous memory (swap)" depends on MMU && BLOCK diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a7c748fa977a..dcaeb1f90b32 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -10,6 +10,12 @@ #include "do_mounts.h" +#include + +#include +#include +#include + int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ static int __init prompt_ramdisk(char *str) @@ -28,7 +34,7 @@ static int __init ramdisk_start_setup(char *str) } __setup("ramdisk_start=", ramdisk_start_setup); -static int __init crd_load(int in_fd, int out_fd); +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); /* * This routine tries to find a RAM disk image to load, and returns the @@ -44,7 +50,7 @@ static int __init crd_load(int in_fd, int out_fd); * gzip */ static int __init -identify_ramdisk_image(int fd, int start_block) +identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) { const int size = 512; struct minix_super_block *minixsb; @@ -70,6 +76,7 @@ identify_ramdisk_image(int fd, int start_block) sys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); +#ifdef CONFIG_RD_GZIP /* * If it matches the gzip magic numbers, return 0 */ @@ -77,9 +84,39 @@ identify_ramdisk_image(int fd, int start_block) printk(KERN_NOTICE "RAMDISK: Compressed image found at block %d\n", start_block); + *decompressor = gunzip; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_BZIP2 + /* + * If it matches the bzip2 magic numbers, return -1 + */ + if (buf[0] == 0x42 && (buf[1] == 0x5a)) { + printk(KERN_NOTICE + "RAMDISK: Bzipped image found at block %d\n", + start_block); + *decompressor = bunzip2; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_LZMA + /* + * If it matches the lzma magic numbers, return -1 + */ + if (buf[0] == 0x5d && (buf[1] == 0x00)) { + printk(KERN_NOTICE + "RAMDISK: Lzma image found at block %d\n", + start_block); + *decompressor = unlzma; nblocks = 0; goto done; } +#endif /* romfs is at block zero too */ if (romfsb->word0 == ROMSB_WORD0 && @@ -143,6 +180,7 @@ int __init rd_load_image(char *from) int nblocks, i, disk; char *buf = NULL; unsigned short rotate = 0; + decompress_fn decompressor = NULL; #if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif @@ -155,12 +193,12 @@ int __init rd_load_image(char *from) if (in_fd < 0) goto noclose_input; - nblocks = identify_ramdisk_image(in_fd, rd_image_start); + nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); if (nblocks < 0) goto done; if (nblocks == 0) { - if (crd_load(in_fd, out_fd) == 0) + if (crd_load(in_fd, out_fd, decompressor) == 0) goto successful_load; goto done; } @@ -259,138 +297,48 @@ int __init rd_load_disk(int n) return rd_load_image("/dev/root"); } -/* - * gzip declarations - */ - -#define OF(args) args - -#ifndef memzero -#define memzero(s, n) memset ((s), 0, (n)) -#endif - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define INBUFSIZ 4096 -#define WSIZE 0x8000 /* window size--must be a power of two, and */ - /* at least 32K for zip's deflate method */ - -static uch *inbuf; -static uch *window; - -static unsigned insize; /* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt; /* bytes in output buffer */ static int exit_code; -static int unzip_error; -static long bytes_out; +static int decompress_error; static int crd_infd, crd_outfd; -#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) - -/* Diagnostic functions (stubbed out) */ -#define Assert(cond,msg) -#define Trace(x) -#define Tracev(x) -#define Tracevv(x) -#define Tracec(c,x) -#define Tracecv(c,x) - -#define STATIC static -#define INIT __init - -static int __init fill_inbuf(void); -static void __init flush_window(void); -static void __init error(char *m); - -#define NO_INFLATE_MALLOC - -#include "../lib/inflate.c" - -/* =========================================================================== - * Fill the input buffer. This is called only when the buffer is empty - * and at least one byte is really needed. - * Returning -1 does not guarantee that gunzip() will ever return. - */ -static int __init fill_inbuf(void) +static int __init compr_fill(void *buf, unsigned int len) { - if (exit_code) return -1; - - insize = sys_read(crd_infd, inbuf, INBUFSIZ); - if (insize == 0) { - error("RAMDISK: ran out of compressed data"); - return -1; - } - - inptr = 1; - - return inbuf[0]; + int r = sys_read(crd_infd, buf, len); + if (r < 0) + printk(KERN_ERR "RAMDISK: error while reading compressed data"); + else if (r == 0) + printk(KERN_ERR "RAMDISK: EOF while reading compressed data"); + return r; } -/* =========================================================================== - * Write the output window window[0..outcnt-1] and update crc and bytes_out. - * (Used for the decompressed data only.) - */ -static void __init flush_window(void) +static int __init compr_flush(void *window, unsigned int outcnt) { - ulg c = crc; /* temporary variable */ - unsigned n, written; - uch *in, ch; - - written = sys_write(crd_outfd, window, outcnt); - if (written != outcnt && unzip_error == 0) { - printk(KERN_ERR "RAMDISK: incomplete write (%d != %d) %ld\n", - written, outcnt, bytes_out); - unzip_error = 1; - } - in = window; - for (n = 0; n < outcnt; n++) { - ch = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; + int written = sys_write(crd_outfd, window, outcnt); + if (written != outcnt) { + if (decompress_error == 0) + printk(KERN_ERR + "RAMDISK: incomplete write (%d != %d)\n", + written, outcnt); + decompress_error = 1; + return -1; + } + return outcnt; } static void __init error(char *x) { printk(KERN_ERR "%s\n", x); exit_code = 1; - unzip_error = 1; + decompress_error = 1; } -static int __init crd_load(int in_fd, int out_fd) +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) { int result; - - insize = 0; /* valid bytes in inbuf */ - inptr = 0; /* index of next byte to be processed in inbuf */ - outcnt = 0; /* bytes in output buffer */ - exit_code = 0; - bytes_out = 0; - crc = (ulg)0xffffffffL; /* shift register contents */ - crd_infd = in_fd; crd_outfd = out_fd; - inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); - if (!inbuf) { - printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n"); - return -1; - } - window = kmalloc(WSIZE, GFP_KERNEL); - if (!window) { - printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n"); - kfree(inbuf); - return -1; - } - makecrc(); - result = gunzip(); - if (unzip_error) + result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); + if (decompress_error) result = 1; - kfree(inbuf); - kfree(window); return result; } diff --git a/init/initramfs.c b/init/initramfs.c index 4f5ba75aaa7c..40bd4fb95788 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -389,11 +389,14 @@ static int __init write_buffer(char *buf, unsigned len) return len - count; } -static void __init flush_buffer(char *buf, unsigned len) + +static int __init flush_buffer(void *bufv, unsigned len) { + char *buf = (char *) bufv; int written; + int origLen = len; if (message) - return; + return -1; while ((written = write_buffer(buf, len)) < len && !message) { char c = buf[written]; if (c == '0') { @@ -407,73 +410,14 @@ static void __init flush_buffer(char *buf, unsigned len) } else error("junk in compressed archive"); } + return origLen; } -/* - * gzip declarations - */ - -#define OF(args) args - -#ifndef memzero -#define memzero(s, n) memset ((s), 0, (n)) -#endif - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define WSIZE 0x8000 /* window size--must be a power of two, and */ - /* at least 32K for zip's deflate method */ - -static uch *inbuf; -static uch *window; - -static unsigned insize; /* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt; /* bytes in output buffer */ -static long bytes_out; - -#define get_byte() (inptr < insize ? inbuf[inptr++] : -1) - -/* Diagnostic functions (stubbed out) */ -#define Assert(cond,msg) -#define Trace(x) -#define Tracev(x) -#define Tracevv(x) -#define Tracec(c,x) -#define Tracecv(c,x) - -#define STATIC static -#define INIT __init - -static void __init flush_window(void); -static void __init error(char *m); - -#define NO_INFLATE_MALLOC +static unsigned my_inptr; /* index of next byte to be processed in inbuf */ -#include "../lib/inflate.c" - -/* =========================================================================== - * Write the output window window[0..outcnt-1] and update crc and bytes_out. - * (Used for the decompressed data only.) - */ -static void __init flush_window(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, ch; - - flush_buffer(window, outcnt); - in = window; - for (n = 0; n < outcnt; n++) { - ch = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} +#include +#include +#include static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { @@ -482,9 +426,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) header_buf = kmalloc(110, GFP_KERNEL); symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); - window = kmalloc(WSIZE, GFP_KERNEL); - if (!window || !header_buf || !symlink_buf || !name_buf) + + if (!header_buf || !symlink_buf || !name_buf) panic("can't allocate buffers"); + state = Start; this_header = 0; message = NULL; @@ -504,22 +449,38 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; - insize = len; - inbuf = buf; - inptr = 0; - outcnt = 0; /* bytes in output buffer */ - bytes_out = 0; - crc = (ulg)0xffffffffL; /* shift register contents */ - makecrc(); - gunzip(); + if (!gunzip(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) + goto ok; + +#ifdef CONFIG_RD_BZIP2 + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!bunzip2(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif + +#ifdef CONFIG_RD_LZMA + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!unlzma(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif +ok: if (state != Reset) - error("junk in gzipped archive"); - this_header = saved_offset + inptr; - buf += inptr; - len -= inptr; + error("junk in compressed archive"); + this_header = saved_offset + my_inptr; + buf += my_inptr; + len -= my_inptr; } dir_utime(); - kfree(window); kfree(name_buf); kfree(symlink_buf); kfree(header_buf); diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..e2a21d5a264f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o \ + decompress_inflate.o decompress_bunzip2.o decompress_unlzma.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -- cgit v1.2.3-59-g8ed1b From c8531ab343dec88ed8005e403b1b304c710b7494 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 5 Jan 2009 13:48:31 -0800 Subject: bzip2/lzma: proper Kconfig dependencies for the ramdisk options Impact: Partial resolution of build failure Make all the compression algorithms properly configurable, and make sure the ramdisk options pull in the proper compression algorithms, as they should. Signed-off-by: H. Peter Anvin --- drivers/block/Kconfig | 20 +++++++++++--------- lib/Kconfig | 13 +++++++++++++ lib/Makefile | 7 +++++-- 3 files changed, 29 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 795594442428..cc9fa69c1ce4 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,10 +358,20 @@ config BLK_DEV_XIP will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). +config RD_GZIP + bool "Initial ramdisk compressed using gzip" + default y + depends on BLK_DEV_INITRD=y + select DECOMPRESS_GZIP + help + Support loading of a gzip encoded initial ramdisk or cpio buffer. + If unsure, say Y. + config RD_BZIP2 bool "Initial ramdisk compressed using bzip2" default n depends on BLK_DEV_INITRD=y + select DECOMPRESS_BZIP2 help Support loading of a bzip2 encoded initial ramdisk or cpio buffer If unsure, say N. @@ -370,19 +380,11 @@ config RD_LZMA bool "Initial ramdisk compressed using lzma" default n depends on BLK_DEV_INITRD=y + select DECOMPRESS_LZMA help Support loading of a lzma encoded initial ramdisk or cpio buffer If unsure, say N. -config RD_GZIP - bool "Initial ramdisk compressed using gzip" - default y - depends on BLK_DEV_INITRD=y - select ZLIB_INFLATE - help - Support loading of a gzip encoded initial ramdisk or cpio buffer. - If unsure, say Y. - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b9083..e37f061fd32a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -97,6 +97,19 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +# +# These all provide a common interface (hence the apparent duplication with +# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) +# +config DECOMPRESS_GZIP + tristate + +config DECOMPRESS_BZIP2 + tristate + +config DECOMPRESS_LZMA + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index e2a21d5a264f..d9ac5a414fa7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,8 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o \ - decompress_inflate.o decompress_bunzip2.o decompress_unlzma.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -66,6 +65,10 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o +obj-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o +obj-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o + obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o -- cgit v1.2.3-59-g8ed1b From 160c1d8e40866edfeae7d68816b7005d70acf391 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:59:02 +0900 Subject: x86, ia64: convert to use generic dma_map_ops struct This converts X86 and IA64 to use include/linux/dma-mapping.h. It's a bit large but pretty boring. The major change for X86 is converting 'int dir' to 'enum dma_data_direction dir' in DMA mapping operations. The major changes for IA64 is using map_page and unmap_page instead of map_single and unmap_single. Signed-off-by: FUJITA Tomonori Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/dig/Makefile | 4 +- arch/ia64/dig/dig_vtd_iommu.c | 77 ------------------- arch/ia64/hp/common/hwsw_iommu.c | 6 +- arch/ia64/hp/common/sba_iommu.c | 46 ++++++++---- arch/ia64/include/asm/dma-mapping.h | 107 ++++++++------------------ arch/ia64/include/asm/machvec.h | 12 +-- arch/ia64/kernel/dma-mapping.c | 4 +- arch/ia64/kernel/machvec.c | 8 +- arch/ia64/kernel/pci-dma.c | 49 +++++++----- arch/ia64/kernel/pci-swiotlb.c | 32 +++++--- arch/ia64/sn/pci/pci_dma.c | 58 +++++++------- arch/x86/include/asm/device.h | 2 +- arch/x86/include/asm/dma-mapping.h | 146 +++++++++++++----------------------- arch/x86/include/asm/iommu.h | 2 +- arch/x86/kernel/amd_iommu.c | 8 +- arch/x86/kernel/pci-calgary_64.c | 15 ++-- arch/x86/kernel/pci-dma.c | 4 +- arch/x86/kernel/pci-gart_64.c | 14 ++-- arch/x86/kernel/pci-nommu.c | 5 +- arch/x86/kernel/pci-swiotlb_64.c | 6 +- drivers/pci/intel-iommu.c | 9 +-- include/linux/intel-iommu.h | 6 +- include/linux/swiotlb.h | 18 +++-- lib/swiotlb.c | 18 +++-- 24 files changed, 278 insertions(+), 378 deletions(-) delete mode 100644 arch/ia64/dig/dig_vtd_iommu.c (limited to 'lib') diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile index 5c0283830bd6..2f7caddf093e 100644 --- a/arch/ia64/dig/Makefile +++ b/arch/ia64/dig/Makefile @@ -7,8 +7,8 @@ obj-y := setup.o ifeq ($(CONFIG_DMAR), y) -obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o else obj-$(CONFIG_IA64_GENERIC) += machvec.o endif -obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o + diff --git a/arch/ia64/dig/dig_vtd_iommu.c b/arch/ia64/dig/dig_vtd_iommu.c deleted file mode 100644 index fdb8ba9f4992..000000000000 --- a/arch/ia64/dig/dig_vtd_iommu.c +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include -#include - -void * -vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags) -{ - return intel_alloc_coherent(dev, size, dma_handle, flags); -} -EXPORT_SYMBOL_GPL(vtd_alloc_coherent); - -void -vtd_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - intel_free_coherent(dev, size, vaddr, dma_handle); -} -EXPORT_SYMBOL_GPL(vtd_free_coherent); - -dma_addr_t -vtd_map_single_attrs(struct device *dev, void *addr, size_t size, - int dir, struct dma_attrs *attrs) -{ - return intel_map_single(dev, (phys_addr_t)addr, size, dir); -} -EXPORT_SYMBOL_GPL(vtd_map_single_attrs); - -void -vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - int dir, struct dma_attrs *attrs) -{ - intel_unmap_single(dev, iova, size, dir); -} -EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs); - -int -vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, - int dir, struct dma_attrs *attrs) -{ - return intel_map_sg(dev, sglist, nents, dir); -} -EXPORT_SYMBOL_GPL(vtd_map_sg_attrs); - -void -vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) -{ - intel_unmap_sg(dev, sglist, nents, dir); -} -EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs); - -int -vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} -EXPORT_SYMBOL_GPL(vtd_dma_mapping_error); - -extern int iommu_dma_supported(struct device *dev, u64 mask); - -struct dma_mapping_ops vtd_dma_ops = { - .alloc_coherent = vtd_alloc_coherent, - .free_coherent = vtd_free_coherent, - .map_single_attrs = vtd_map_single_attrs, - .unmap_single_attrs = vtd_unmap_single_attrs, - .map_sg_attrs = vtd_map_sg_attrs, - .unmap_sg_attrs = vtd_unmap_sg_attrs, - .sync_single_for_cpu = machvec_dma_sync_single, - .sync_sg_for_cpu = machvec_dma_sync_sg, - .sync_single_for_device = machvec_dma_sync_single, - .sync_sg_for_device = machvec_dma_sync_sg, - .dma_supported_op = iommu_dma_supported, - .mapping_error = vtd_dma_mapping_error, -}; diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index e5bbeba77810..e4a80d82e3d8 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -17,7 +17,7 @@ #include #include -extern struct dma_mapping_ops sba_dma_ops, swiotlb_dma_ops; +extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; /* swiotlb declarations & definitions: */ extern int swiotlb_late_init_with_default_size (size_t size); @@ -30,10 +30,10 @@ extern int swiotlb_late_init_with_default_size (size_t size); static inline int use_swiotlb(struct device *dev) { return dev && dev->dma_mask && - !sba_dma_ops.dma_supported_op(dev, *dev->dma_mask); + !sba_dma_ops.dma_supported(dev, *dev->dma_mask); } -struct dma_mapping_ops *hwsw_dma_get_ops(struct device *dev) +struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) return &swiotlb_dma_ops; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 29e7206f3dc6..129b62eb39e5 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -909,11 +909,13 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) * * See Documentation/DMA-mapping.txt */ -static dma_addr_t -sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, - struct dma_attrs *attrs) +static dma_addr_t sba_map_page(struct device *dev, struct page *page, + unsigned long poff, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { struct ioc *ioc; + void *addr = page_address(page) + poff; dma_addr_t iovp; dma_addr_t offset; u64 *pdir_start; @@ -992,6 +994,14 @@ sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, return SBA_IOVA(ioc, iovp, offset); } +static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return sba_map_page(dev, virt_to_page(addr), + (unsigned long)addr & ~PAGE_MASK, size, dir, attrs); +} + #ifdef ENABLE_MARK_CLEAN static SBA_INLINE void sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) @@ -1026,8 +1036,8 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) * * See Documentation/DMA-mapping.txt */ -static void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - int dir, struct dma_attrs *attrs) +static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -1095,6 +1105,12 @@ static void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t s #endif /* DELAYED_RESOURCE_CNT == 0 */ } +void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + sba_unmap_page(dev, iova, size, dir, attrs); +} + /** * sba_alloc_coherent - allocate/map shared mem for DMA * @dev: instance of PCI owned by the driver that's asking. @@ -1423,7 +1439,8 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, * See Documentation/DMA-mapping.txt */ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1514,7 +1531,8 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, * See Documentation/DMA-mapping.txt */ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, int dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { #ifdef ASSERT_PDIR_SANITY struct ioc *ioc; @@ -2062,7 +2080,7 @@ static struct acpi_driver acpi_sba_ioc_driver = { }, }; -extern struct dma_mapping_ops swiotlb_dma_ops; +extern struct dma_map_ops swiotlb_dma_ops; static int __init sba_init(void) @@ -2176,18 +2194,18 @@ sba_page_override(char *str) __setup("sbapagesize=",sba_page_override); -struct dma_mapping_ops sba_dma_ops = { +struct dma_map_ops sba_dma_ops = { .alloc_coherent = sba_alloc_coherent, .free_coherent = sba_free_coherent, - .map_single_attrs = sba_map_single_attrs, - .unmap_single_attrs = sba_unmap_single_attrs, - .map_sg_attrs = sba_map_sg_attrs, - .unmap_sg_attrs = sba_unmap_sg_attrs, + .map_page = sba_map_page, + .unmap_page = sba_unmap_page, + .map_sg = sba_map_sg_attrs, + .unmap_sg = sba_unmap_sg_attrs, .sync_single_for_cpu = machvec_dma_sync_single, .sync_sg_for_cpu = machvec_dma_sync_sg, .sync_single_for_device = machvec_dma_sync_single, .sync_sg_for_device = machvec_dma_sync_sg, - .dma_supported_op = sba_dma_supported, + .dma_supported = sba_dma_supported, .mapping_error = sba_dma_mapping_error, }; diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index bac3159379f7..d6230f514536 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -9,73 +9,21 @@ #include #include -struct dma_mapping_ops { - int (*mapping_error)(struct device *dev, - dma_addr_t dma_addr); - void* (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - void (*free_coherent)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr, - size_t size, int direction); - void (*unmap_single)(struct device *dev, dma_addr_t addr, - size_t size, int direction); - dma_addr_t (*map_single_attrs)(struct device *dev, void *cpu_addr, - size_t size, int direction, - struct dma_attrs *attrs); - void (*unmap_single_attrs)(struct device *dev, - dma_addr_t dma_addr, - size_t size, int direction, - struct dma_attrs *attrs); - void (*sync_single_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_for_device)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_range_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_single_range_for_device)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_sg_for_cpu)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - void (*sync_sg_for_device)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - int (*map_sg)(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - void (*unmap_sg)(struct device *hwdev, - struct scatterlist *sg, int nents, - int direction); - int (*map_sg_attrs)(struct device *dev, - struct scatterlist *sg, int nents, - int direction, struct dma_attrs *attrs); - void (*unmap_sg_attrs)(struct device *dev, - struct scatterlist *sg, int nents, - int direction, - struct dma_attrs *attrs); - int (*dma_supported_op)(struct device *hwdev, u64 mask); - int is_phys; -}; - -extern struct dma_mapping_ops *dma_ops; +extern struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *daddr, gfp_t gfp) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->alloc_coherent(dev, size, daddr, gfp | GFP_DMA); } static inline void dma_free_coherent(struct device *dev, size_t size, void *caddr, dma_addr_t daddr) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->free_coherent(dev, size, caddr, daddr); } @@ -87,8 +35,10 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->map_single_attrs(dev, caddr, size, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_page(dev, virt_to_page(caddr), + (unsigned long)caddr & ~PAGE_MASK, size, + dir, attrs); } static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t daddr, @@ -96,8 +46,8 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t daddr, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - ops->unmap_single_attrs(dev, daddr, size, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + ops->unmap_page(dev, daddr, size, dir, attrs); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) @@ -107,8 +57,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->map_sg_attrs(dev, sgl, nents, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_sg(dev, sgl, nents, dir, attrs); } static inline void dma_unmap_sg_attrs(struct device *dev, @@ -116,8 +66,8 @@ static inline void dma_unmap_sg_attrs(struct device *dev, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - ops->unmap_sg_attrs(dev, sgl, nents, dir, attrs); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + ops->unmap_sg(dev, sgl, nents, dir, attrs); } #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) @@ -127,7 +77,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t daddr, size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_single_for_cpu(dev, daddr, size, dir); } @@ -135,7 +85,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_sg_for_cpu(dev, sgl, nents, dir); } @@ -144,7 +94,7 @@ static inline void dma_sync_single_for_device(struct device *dev, size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_single_for_device(dev, daddr, size, dir); } @@ -153,20 +103,29 @@ static inline void dma_sync_sg_for_device(struct device *dev, int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); ops->sync_sg_for_device(dev, sgl, nents, dir); } static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->mapping_error(dev, daddr); } -#define dma_map_page(dev, pg, off, size, dir) \ - dma_map_single(dev, page_address(pg) + (off), (size), (dir)) -#define dma_unmap_page(dev, dma_addr, size, dir) \ - dma_unmap_single(dev, dma_addr, size, dir) +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->map_page(dev, page, offset, size, dir, NULL); +} + +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + dma_unmap_single(dev, addr, size, dir); +} /* * Rest of this file is part of the "Advanced DMA API". Use at your own risk. @@ -180,8 +139,8 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); - return ops->dma_supported_op(dev, mask); + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->dma_supported(dev, mask); } static inline int diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 95e1708fa4e3..e8442c7e4cc8 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -11,7 +11,6 @@ #define _ASM_IA64_MACHVEC_H #include -#include /* forward declarations: */ struct device; @@ -24,6 +23,7 @@ struct task_struct; struct pci_dev; struct msi_desc; struct dma_attrs; +enum dma_data_direction; typedef void ia64_mv_setup_t (char **); typedef void ia64_mv_cpu_init_t (void); @@ -45,7 +45,7 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); -typedef struct dma_mapping_ops *ia64_mv_dma_get_ops(struct device *); +typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* * WARNING: The legacy I/O space is _architected_. Platforms are @@ -97,8 +97,10 @@ machvec_noop_bus (struct pci_bus *bus) extern void machvec_setup (char **); extern void machvec_timer_interrupt (int, void *); -extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int); -extern void machvec_dma_sync_sg (struct device *, struct scatterlist *, int, int); +extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t, + enum dma_data_direction); +extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction); extern void machvec_tlb_migrate_finish (struct mm_struct *); # if defined (CONFIG_IA64_HP_SIM) @@ -250,7 +252,7 @@ extern void machvec_init_from_cmdline(const char *cmdline); # endif /* CONFIG_IA64_GENERIC */ extern void swiotlb_dma_init(void); -extern struct dma_mapping_ops *dma_get_ops(struct device *); +extern struct dma_map_ops *dma_get_ops(struct device *); /* * Define default versions so we can extend machvec for new platforms without having diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 427f69617226..7060e13fa421 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,9 +1,9 @@ #include -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -struct dma_mapping_ops *dma_get_ops(struct device *dev) +struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; } diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 7ccb228ceedc..d41a40ef80c0 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -1,5 +1,5 @@ #include - +#include #include #include @@ -75,14 +75,16 @@ machvec_timer_interrupt (int irq, void *dev_id) EXPORT_SYMBOL(machvec_timer_interrupt); void -machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir) +machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) { mb(); } EXPORT_SYMBOL(machvec_dma_sync_single); void -machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir) +machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n, + enum dma_data_direction dir) { mb(); } diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 640669eba5d4..b30209ec8c6e 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -41,21 +41,7 @@ struct device fallback_dev = { .dma_mask = &fallback_dev.coherent_dma_mask, }; -extern struct dma_mapping_ops vtd_dma_ops; - -void __init pci_iommu_alloc(void) -{ - dma_ops = &vtd_dma_ops; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} +extern struct dma_map_ops intel_dma_ops; static int __init pci_iommu_init(void) { @@ -81,10 +67,10 @@ iommu_dma_init(void) int iommu_dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = platform_dma_get_ops(dev); + struct dma_map_ops *ops = platform_dma_get_ops(dev); - if (ops->dma_supported_op) - return ops->dma_supported_op(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -113,4 +99,31 @@ int iommu_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(iommu_dma_supported); +static int vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +void __init pci_iommu_alloc(void) +{ + dma_ops = &intel_dma_ops; + + dma_ops->sync_single_for_cpu = machvec_dma_sync_single; + dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; + dma_ops->sync_single_for_device = machvec_dma_sync_single; + dma_ops->sync_sg_for_device = machvec_dma_sync_sg; + dma_ops->dma_supported = iommu_dma_supported; + dma_ops->mapping_error = vtd_dma_mapping_error; + + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + #endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 9f172c864377..6bf8f66786bd 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -16,24 +16,36 @@ EXPORT_SYMBOL(swiotlb); /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -struct dma_mapping_ops swiotlb_dma_ops = { +static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return swiotlb_map_single_attrs(dev, page_address(page) + offset, size, + dir, attrs); +} + +static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + swiotlb_unmap_single_attrs(dev, dma_handle, size, dir, attrs); +} + +struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, - .unmap_single = swiotlb_unmap_single, - .map_single_attrs = swiotlb_map_single_attrs, - .unmap_single_attrs = swiotlb_unmap_single_attrs, - .map_sg_attrs = swiotlb_map_sg_attrs, - .unmap_sg_attrs = swiotlb_unmap_sg_attrs, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg, - .unmap_sg = swiotlb_unmap_sg, - .dma_supported_op = swiotlb_dma_supported, + .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index efdd69490009..9c788f9cedfd 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -171,10 +170,12 @@ static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr * TODO: simplify our interface; * figure out how to save dmamap handle so can use two step. */ -static dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, - size_t size, int direction, - struct dma_attrs *attrs) +static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { + void *cpu_addr = page_address(page) + offset; dma_addr_t dma_addr; unsigned long phys_addr; struct pci_dev *pdev = to_pci_dev(dev); @@ -212,20 +213,20 @@ static dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, * by @dma_handle into the coherence domain. On SN, we're always cache * coherent, so we just need to free any ATEs associated with this mapping. */ -static void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction, - struct dma_attrs *attrs) +static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); - provider->dma_unmap(pdev, dma_addr, direction); + provider->dma_unmap(pdev, dma_addr, dir); } /** - * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist + * sn_dma_unmap_sg - unmap a DMA scatterlist * @dev: device to unmap * @sg: scatterlist to unmap * @nhwentries: number of scatterlist entries @@ -234,9 +235,9 @@ static void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, * * Unmap a set of streaming mode DMA translations. */ -static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nhwentries, int direction, - struct dma_attrs *attrs) +static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(dev); @@ -246,14 +247,14 @@ static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, BUG_ON(dev->bus != &pci_bus_type); for_each_sg(sgl, sg, nhwentries, i) { - provider->dma_unmap(pdev, sg->dma_address, direction); + provider->dma_unmap(pdev, sg->dma_address, dir); sg->dma_address = (dma_addr_t) NULL; sg->dma_length = 0; } } /** - * sn_dma_map_sg_attrs - map a scatterlist for DMA + * sn_dma_map_sg - map a scatterlist for DMA * @dev: device to map for * @sg: scatterlist to map * @nhwentries: number of entries @@ -267,8 +268,9 @@ static void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, * * Maps each entry of @sg for DMA. */ -static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nhwentries, int direction, struct dma_attrs *attrs) +static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long phys_addr; struct scatterlist *saved_sg = sgl, *sg; @@ -305,8 +307,7 @@ static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, * Free any successfully allocated entries. */ if (i > 0) - sn_dma_unmap_sg_attrs(dev, saved_sg, i, - direction, attrs); + sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs); return 0; } @@ -317,25 +318,26 @@ static int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, } static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, + enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { BUG_ON(dev->bus != &pci_bus_type); } @@ -455,19 +457,19 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } -static struct dma_mapping_ops sn_dma_ops = { +static struct dma_map_ops sn_dma_ops = { .alloc_coherent = sn_dma_alloc_coherent, .free_coherent = sn_dma_free_coherent, - .map_single_attrs = sn_dma_map_single_attrs, - .unmap_single_attrs = sn_dma_unmap_single_attrs, - .map_sg_attrs = sn_dma_map_sg_attrs, - .unmap_sg_attrs = sn_dma_unmap_sg_attrs, + .map_page = sn_dma_map_page, + .unmap_page = sn_dma_unmap_page, + .map_sg = sn_dma_map_sg, + .unmap_sg = sn_dma_unmap_sg, .sync_single_for_cpu = sn_dma_sync_single_for_cpu, .sync_sg_for_cpu = sn_dma_sync_sg_for_cpu, .sync_single_for_device = sn_dma_sync_single_for_device, .sync_sg_for_device = sn_dma_sync_sg_for_device, .mapping_error = sn_dma_mapping_error, - .dma_supported_op = sn_dma_supported, + .dma_supported = sn_dma_supported, }; void sn_dma_init(void) diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 3c034f48fdb0..4994a20acbcb 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,7 +6,7 @@ struct dev_archdata { void *acpi_handle; #endif #ifdef CONFIG_X86_64 -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; #endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index b81f82268a16..5a347805a6c7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -17,50 +17,9 @@ extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; -struct dma_mapping_ops { - int (*mapping_error)(struct device *dev, - dma_addr_t dma_addr); - void* (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - void (*free_coherent)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - void (*sync_single_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_for_device)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_range_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_single_range_for_device)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_sg_for_cpu)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - void (*sync_sg_for_device)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - int (*map_sg)(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - void (*unmap_sg)(struct device *hwdev, - struct scatterlist *sg, int nents, - int direction); - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs); - void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs); - int (*dma_supported)(struct device *hwdev, u64 mask); - int is_phys; -}; - -extern struct dma_mapping_ops *dma_ops; - -static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) +extern struct dma_map_ops *dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) { #ifdef CONFIG_X86_32 return dma_ops; @@ -75,7 +34,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); @@ -94,138 +53,139 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); return ops->map_page(hwdev, virt_to_page(ptr), (unsigned long)ptr & ~PAGE_MASK, size, - direction, NULL); + dir, NULL); } static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) - ops->unmap_page(dev, addr, size, direction, NULL); + ops->unmap_page(dev, addr, size, dir, NULL); } static inline int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) + int nents, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); - return ops->map_sg(hwdev, sg, nents, direction); + BUG_ON(!valid_dma_direction(dir)); + return ops->map_sg(hwdev, sg, nents, dir, NULL); } static inline void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_sg) - ops->unmap_sg(hwdev, sg, nents, direction); + ops->unmap_sg(hwdev, sg, nents, dir, NULL); } static inline void dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); + ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); flush_write_buffers(); } static inline void dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_device) - ops->sync_single_for_device(hwdev, dma_handle, size, direction); + ops->sync_single_for_device(hwdev, dma_handle, size, dir); flush_write_buffers(); } static inline void dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, int direction) + unsigned long offset, size_t size, + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_range_for_cpu) ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, direction); + size, dir); flush_write_buffers(); } static inline void dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_range_for_device) ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); + offset, size, dir); flush_write_buffers(); } static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_cpu) - ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); flush_write_buffers(); } static inline void dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(hwdev); + struct dma_map_ops *ops = get_dma_ops(hwdev); - BUG_ON(!valid_dma_direction(direction)); + BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_device) - ops->sync_sg_for_device(hwdev, sg, nelems, direction); + ops->sync_sg_for_device(hwdev, sg, nelems, dir); flush_write_buffers(); } static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction) + enum dma_data_direction dir) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!valid_dma_direction(direction)); - return ops->map_page(dev, page, offset, size, direction, NULL); + BUG_ON(!valid_dma_direction(dir)); + return ops->map_page(dev, page, offset, size, dir, NULL); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction) + size_t size, enum dma_data_direction dir) { - dma_unmap_single(dev, addr, size, direction); + dma_unmap_single(dev, addr, size, dir); } static inline void @@ -271,7 +231,7 @@ static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); void *memory; gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -297,7 +257,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, static inline void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); WARN_ON(irqs_disabled()); /* for portability */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index a6ee9e6f530f..af326a2975b5 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -3,7 +3,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); -extern struct dma_mapping_ops nommu_dma_ops; +extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a5dedb690a9a..008e522b9536 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1394,7 +1394,8 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, * lists). */ static int map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1461,7 +1462,8 @@ unmap: * lists). */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { unsigned long flags; struct amd_iommu *iommu; @@ -1648,7 +1650,7 @@ static void prealloc_protection_domains(void) } } -static struct dma_mapping_ops amd_iommu_dma_ops = { +static struct dma_map_ops amd_iommu_dma_ops = { .alloc_coherent = alloc_coherent, .free_coherent = free_coherent, .map_page = map_page, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 756138b604e1..755c21e906f3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -380,8 +380,9 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) return tbl; } -static void calgary_unmap_sg(struct device *dev, - struct scatterlist *sglist, int nelems, int direction) +static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems,enum dma_data_direction dir, + struct dma_attrs *attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -404,7 +405,8 @@ static void calgary_unmap_sg(struct device *dev, } static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - int nelems, int direction) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -429,15 +431,14 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, s->dma_address = (entry << PAGE_SHIFT) | s->offset; /* insert into HW table */ - tce_build(tbl, entry, npages, vaddr & PAGE_MASK, - direction); + tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir); s->dma_length = s->length; } return nelems; error: - calgary_unmap_sg(dev, sg, nelems, direction); + calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { sg->dma_address = bad_dma_address; sg->dma_length = 0; @@ -518,7 +519,7 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } -static struct dma_mapping_ops calgary_dma_ops = { +static struct dma_map_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .free_coherent = calgary_free_coherent, .map_sg = calgary_map_sg, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19a1044a0cd9..0d75c129b18a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -12,7 +12,7 @@ static int forbid_dac __read_mostly; -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -224,7 +224,7 @@ early_param("iommu", iommu_setup); int dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9c557c0c928c..8cb3e45439cf 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -302,8 +302,8 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, /* * Wrapper for pci_unmap_single working with scatterlists. */ -static void -gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s; int i; @@ -333,7 +333,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) - gart_unmap_sg(dev, sg, i, dir); + gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; sg[0].dma_length = 0; break; @@ -404,8 +404,8 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, * DMA map all entries in a scatterlist. * Merge chunks that have page aligned sizes into a continuous mapping. */ -static int -gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s, *ps, *start_sg, *sgmap; int need = 0, nextneed, i, out, start; @@ -472,7 +472,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) error: flush_gart(); - gart_unmap_sg(dev, sg, out, dir); + gart_unmap_sg(dev, sg, out, dir, NULL); /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { @@ -711,7 +711,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -static struct dma_mapping_ops gart_dma_ops = { +static struct dma_map_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index d42b69c90b40..fe50214db876 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -54,7 +54,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, * the same here. */ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct scatterlist *s; int i; @@ -78,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } -struct dma_mapping_ops nommu_dma_ops = { +struct dma_map_ops nommu_dma_ops = { .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_sg = nommu_map_sg, diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3ae354c0fdef..3f0d9924dd1c 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -67,7 +67,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, @@ -77,8 +77,8 @@ struct dma_mapping_ops swiotlb_dma_ops = { .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg, - .unmap_sg = swiotlb_unmap_sg, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .dma_supported = NULL, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index da273e4ef66c..b9a562933903 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2441,7 +2441,8 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, - int nelems, int dir) + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -2499,7 +2500,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, } int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, - int dir) + enum dma_data_direction dir, struct dma_attrs *attrs) { void *addr; int i; @@ -2579,15 +2580,13 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, return nelems; } -static struct dma_mapping_ops intel_dma_ops = { +struct dma_map_ops intel_dma_ops = { .alloc_coherent = intel_alloc_coherent, .free_coherent = intel_free_coherent, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, -#ifdef CONFIG_X86_64 .map_page = intel_map_page, .unmap_page = intel_unmap_page, -#endif }; static inline int iommu_domain_cache_init(void) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..a254db1decd0 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -334,7 +334,9 @@ extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int); -extern int intel_map_sg(struct device *, struct scatterlist *, int, int); -extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int); +extern int intel_map_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *); +extern void intel_unmap_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *); #endif diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index dedd3c0cfe30..0567c3d8633b 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -66,36 +66,38 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, extern int swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir, struct dma_attrs *attrs); + enum dma_data_direction dir, struct dma_attrs *attrs); extern void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, struct dma_attrs *attrs); + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs); extern void swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); + size_t size, enum dma_data_direction dir); extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir); + int nelems, enum dma_data_direction dir); extern void swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); + size_t size, enum dma_data_direction dir); extern void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir); + int nelems, enum dma_data_direction dir); extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir); + unsigned long offset, size_t size, + enum dma_data_direction dir); extern void swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, - int dir); + enum dma_data_direction dir); extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 48deef7e1976..d047de990a3f 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -736,7 +736,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, void swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); } @@ -744,7 +744,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); void swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); } @@ -769,7 +769,8 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, void swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) + unsigned long offset, size_t size, + enum dma_data_direction dir) { swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_CPU); @@ -778,7 +779,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); void swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, int dir) + unsigned long offset, size_t size, + enum dma_data_direction dir) { swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_DEVICE); @@ -803,7 +805,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); */ int swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir, struct dma_attrs *attrs) + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -850,7 +852,7 @@ EXPORT_SYMBOL(swiotlb_map_sg); */ void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, struct dma_attrs *attrs) + int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -902,7 +904,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, void swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) + int nelems, enum dma_data_direction dir) { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); } @@ -910,7 +912,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int dir) + int nelems, enum dma_data_direction dir) { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } -- cgit v1.2.3-59-g8ed1b From f98eee8ea99fe74ee9c4e867ba178ec3072793be Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 5 Jan 2009 23:59:03 +0900 Subject: x86, ia64: remove duplicated swiotlb code This adds swiotlb_map_page and swiotlb_unmap_page to lib/swiotlb.c and remove IA64 and X86's swiotlb_map_page and swiotlb_unmap_page. This also removes unnecessary swiotlb_map_single, swiotlb_map_single_attrs, swiotlb_unmap_single and swiotlb_unmap_single_attrs. Signed-off-by: FUJITA Tomonori Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/kernel/pci-swiotlb.c | 16 -------------- arch/x86/kernel/pci-swiotlb_64.c | 17 -------------- include/linux/swiotlb.h | 21 ++++++------------ lib/swiotlb.c | 48 +++++++++++++++------------------------- 4 files changed, 25 insertions(+), 77 deletions(-) (limited to 'lib') diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 6bf8f66786bd..e6b2ec9b27da 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -16,22 +16,6 @@ EXPORT_SYMBOL(swiotlb); /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - return swiotlb_map_single_attrs(dev, page_address(page) + offset, size, - dir, attrs); -} - -static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - swiotlb_unmap_single_attrs(dev, dma_handle, size, dir, attrs); -} - struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3f0d9924dd1c..5e32c4f6a7ba 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -38,23 +38,6 @@ int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) return 0; } -/* these will be moved to lib/swiotlb.c later on */ - -static dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - return swiotlb_map_single(dev, page_address(page) + offset, size, dir); -} - -static void swiotlb_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - swiotlb_unmap_single(dev, dma_handle, size, dir); -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 0567c3d8633b..493dc17e7c87 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -41,20 +41,13 @@ extern void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); -extern dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir); - -extern void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); - -extern dma_addr_t -swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, - int dir, struct dma_attrs *attrs); - -extern void -swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, struct dma_attrs *attrs); +extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs); +extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs); extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d047de990a3f..ec7922bd0d61 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -636,11 +636,14 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * Once the device is given the dma address, the device owns this memory until * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. */ -dma_addr_t -swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, - int dir, struct dma_attrs *attrs) -{ - dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr); +dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + void *ptr = page_address(page) + offset; + dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); void *map; BUG_ON(dir == DMA_NONE); @@ -649,37 +652,30 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(hwdev, dev_addr, size) && + if (!address_needs_mapping(dev, dev_addr, size) && !range_needs_mapping(ptr, size)) return dev_addr; /* * Oh well, have to allocate and map a bounce buffer. */ - map = map_single(hwdev, virt_to_phys(ptr), size, dir); + map = map_single(dev, phys, size, dir); if (!map) { - swiotlb_full(hwdev, size, dir, 1); + swiotlb_full(dev, size, dir, 1); map = io_tlb_overflow_buffer; } - dev_addr = swiotlb_virt_to_bus(hwdev, map); + dev_addr = swiotlb_virt_to_bus(dev, map); /* * Ensure that the address returned is DMA'ble */ - if (address_needs_mapping(hwdev, dev_addr, size)) + if (address_needs_mapping(dev, dev_addr, size)) panic("map_single: bounce buffer is not DMA'ble"); return dev_addr; } -EXPORT_SYMBOL(swiotlb_map_single_attrs); - -dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) -{ - return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL); -} -EXPORT_SYMBOL(swiotlb_map_single); +EXPORT_SYMBOL_GPL(swiotlb_map_page); /* * Unmap a single streaming mode DMA translation. The dma_addr and size must @@ -689,9 +685,9 @@ EXPORT_SYMBOL(swiotlb_map_single); * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -void -swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, struct dma_attrs *attrs) +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { char *dma_addr = swiotlb_bus_to_virt(dev_addr); @@ -701,15 +697,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, else if (dir == DMA_FROM_DEVICE) dma_mark_clean(dma_addr, size); } -EXPORT_SYMBOL(swiotlb_unmap_single_attrs); - -void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) -{ - return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); -} -EXPORT_SYMBOL(swiotlb_unmap_single); +EXPORT_SYMBOL_GPL(swiotlb_unmap_page); /* * Make physical memory consistent for a single streaming mode DMA translation -- cgit v1.2.3-59-g8ed1b From 7856a16ea03ed9b17860d756ee6473c2e57882b2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 7 Jan 2009 00:01:43 -0800 Subject: bzip2/lzma: DECOMPRESS_GZIP should select ZLIB_INFLATE Impact: Partial resolution of build failure DECOMPRESS_GZIP is just a common-interface wrapper around the zlib_inflate code; it thus need to select it. Signed-off-by: H. Peter Anvin --- lib/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index e37f061fd32a..daa481824d9c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -102,6 +102,7 @@ config LZO_DECOMPRESS # ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) # config DECOMPRESS_GZIP + select ZLIB_INFLATE tristate config DECOMPRESS_BZIP2 -- cgit v1.2.3-59-g8ed1b From 5619448fc55650cb76cbb825e16e4bb5d1390399 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:09:12 -0800 Subject: bzip2/lzma: fix constant in decompress_inflate Impact: Cleanup Fix constant 0x8100 /* 32K */; according to Alain the value 0x8100 was left over test code to test misalignment, the correct value is indeed 0x8000 == 32K. Signed-off-by: H. Peter Anvin --- lib/decompress_inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 163e66aea5f6..d2e7c758d1ca 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -41,7 +41,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, set_error_fn(error_fn); rc = -1; if (flush) { - out_len = 0x8100; /* 32 K */ + out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len); } else { out_len = 0x7fffffff; /* no limit */ -- cgit v1.2.3-59-g8ed1b From 6c11b12ac6f101732d43b5682f5b3ae4dda4205f Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 8 Jan 2009 15:10:19 -0800 Subject: bzip2/lzma: fix decompress_inflate.c vs multi-block-with-embedded-filename Impact: Bug fix Fix gunzip uncompression, so that it also works with files with embedded filenames that are larger than one block. Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- lib/decompress_inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d2e7c758d1ca..839a329b4fc4 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -97,7 +97,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, strm->next_in++; strm->next_in++; } - strm->avail_in = len - 10; + strm->avail_in = len - (strm->next_in - zbuf); strm->next_out = out_buf; strm->avail_out = out_len; -- cgit v1.2.3-59-g8ed1b From 889c92d21db40be0b7d22a59395060237895bb85 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:14:17 -0800 Subject: bzip2/lzma: centralize format detection Centralize the compression format detection to a common routine in the lib directory, and use it for both initramfs and initrd. Signed-off-by: H. Peter Anvin --- include/linux/decompress/generic.h | 3 +++ init/do_mounts_rd.c | 38 ++++++----------------------- init/initramfs.c | 39 ++++++----------------------- lib/Makefile | 9 ++++--- lib/decompress.c | 50 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 lib/decompress.c (limited to 'lib') diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index f847f514f78e..6dfb856327bb 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -26,5 +26,8 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len, *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE */ +/* Utility routine to detect the decompression method */ +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name); #endif diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 9c9d7dbcf9ca..a06ed4f92e0e 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -12,9 +12,6 @@ #include -#include -#include -#include int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ @@ -49,24 +46,6 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * cramfs * gzip */ -static const struct compress_format { - unsigned char magic[2]; - const char *name; - decompress_fn decompressor; -} compressed_formats[] = { -#ifdef CONFIG_RD_GZIP - { {037, 0213}, "gzip", gunzip }, - { {037, 0236}, "gzip", gunzip }, -#endif -#ifdef CONFIG_RD_BZIP2 - { {0x42, 0x5a}, "bzip2", bunzip2 }, -#endif -#ifdef CONFIG_RD_LZMA - { {0x5d, 0x00}, "lzma", unlzma }, -#endif - { {0, 0}, NULL, NULL } -}; - static int __init identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) { @@ -77,7 +56,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) struct cramfs_super *cramfsb; int nblocks = -1; unsigned char *buf; - const struct compress_format *cf; + const char *compress_name; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -95,15 +74,12 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) sys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); - for (cf = compressed_formats; cf->decompressor; cf++) { - if (buf[0] == cf->magic[0] && buf[1] == cf->magic[1]) { - printk(KERN_NOTICE - "RAMDISK: %s image found at block %d\n", - cf->name, start_block); - *decompressor = cf->decompressor; - nblocks = 0; - goto done; - } + *decompressor = decompress_method(buf, size, &compress_name); + if (*decompressor) { + printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", + compress_name, start_block); + nblocks = 0; + goto done; } /* romfs is at block zero too */ diff --git a/init/initramfs.c b/init/initramfs.c index a3ba91cdab89..2f42984e5582 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -416,13 +416,13 @@ static int __init flush_buffer(void *bufv, unsigned len) static unsigned my_inptr; /* index of next byte to be processed in inbuf */ -#include -#include -#include +#include static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { int written; + decompress_fn decompress; + dry_run = check_only; header_buf = kmalloc(110, GFP_KERNEL); symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); @@ -450,35 +450,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; -#ifdef CONFIG_RD_GZIP - if (!gunzip(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) - goto ok; -#endif - -#ifdef CONFIG_RD_BZIP2 - message = NULL; /* Zero out message, or else cpio will - think an error has already occured */ - if (!bunzip2(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) { - goto ok; - } -#endif - -#ifdef CONFIG_RD_LZMA - message = NULL; /* Zero out message, or else cpio will - think an error has already occured */ - if (!unlzma(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) { - goto ok; - } -#endif -#if defined CONFIG_RD_GZIP || defined CONFIG_RD_BZIP2 || defined CONFIG_RD_LZMA -ok: -#endif + decompress = decompress_method(buf, len, NULL); + if (decompress) + decompress(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error); if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; diff --git a/lib/Makefile b/lib/Makefile index d9ac5a414fa7..790de7c25d0d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o + proportions.o prio_heap.o ratelimit.o show_mem.o \ + is_single_threaded.o decompress.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -65,9 +66,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ -obj-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o -obj-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o -obj-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o +lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o +lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o +lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/decompress.c b/lib/decompress.c new file mode 100644 index 000000000000..edac55cc7823 --- /dev/null +++ b/lib/decompress.c @@ -0,0 +1,50 @@ +/* + * decompress.c + * + * Detect the decompression method based on magic number + */ + +#include + +#include +#include +#include + +#include +#include + +static const struct compress_format { + unsigned char magic[2]; + const char *name; + decompress_fn decompressor; +} compressed_formats[] = { +#ifdef CONFIG_DECOMPRESS_GZIP + { {037, 0213}, "gzip", gunzip }, + { {037, 0236}, "gzip", gunzip }, +#endif +#ifdef CONFIG_DECOMPRESS_BZIP2 + { {0x42, 0x5a}, "bzip2", bunzip2 }, +#endif +#ifdef CONFIG_DECOMPRESS_LZMA + { {0x5d, 0x00}, "lzma", unlzma }, +#endif + { {0, 0}, NULL, NULL } +}; + +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name) +{ + const struct compress_format *cf; + + if (len < 2) + return NULL; /* Need at least this much... */ + + for (cf = compressed_formats; cf->decompressor; cf++) { + if (!memcmp(inbuf, cf->magic, 2)) + break; + + } + if (name) + *name = cf->name; + return cf->decompressor; +} -- cgit v1.2.3-59-g8ed1b From 0b8698ab5847cbe25775083659f00c658a8161c9 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 9 Jan 2009 18:32:09 +0000 Subject: swiotlb: range_needs_mapping should take a physical address. The swiotlb_arch_range_needs_mapping() hook should take a physical address rather than a virtual address in order to support highmem pages. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb_64.c | 2 +- include/linux/swiotlb.h | 2 +- lib/swiotlb.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 5e32c4f6a7ba..34f12e9996ed 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -33,7 +33,7 @@ phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) return baddr; } -int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) +int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) { return 0; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 493dc17e7c87..ac9ff54f7cb3 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -31,7 +31,7 @@ extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); -extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); +extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 30fe65ede2bb..31bae40830ca 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -145,7 +145,7 @@ static void *swiotlb_bus_to_virt(dma_addr_t address) return phys_to_virt(swiotlb_bus_to_phys(address)); } -int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) +int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) { return 0; } @@ -315,9 +315,9 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); } -static inline int range_needs_mapping(void *ptr, size_t size) +static inline int range_needs_mapping(phys_addr_t paddr, size_t size) { - return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size); + return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); } static int is_swiotlb_buffer(char *addr) @@ -653,7 +653,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(ptr, size)) + !range_needs_mapping(virt_to_phys(ptr), size)) return dev_addr; /* @@ -804,7 +804,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, void *addr = sg_virt(sg); dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr); - if (range_needs_mapping(addr, sg->length) || + if (range_needs_mapping(sg_phys(sg), sg->length) || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); -- cgit v1.2.3-59-g8ed1b From 961d7d0ee5150e0197cc81c2a8884ecb230276e2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 9 Jan 2009 18:32:10 +0000 Subject: swiotlb: do not use sg_virt() Scatterlists containing HighMem pages do not have a useful virtual address. Use the physical address instead. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 31bae40830ca..32e2bd3b1142 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -801,10 +801,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - void *addr = sg_virt(sg); - dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr); + phys_addr_t paddr = sg_phys(sg); + dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); - if (range_needs_mapping(sg_phys(sg), sg->length) || + if (range_needs_mapping(paddr, sg->length) || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); @@ -848,11 +848,11 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg))) + if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir); else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(sg_virt(sg), sg->dma_length); + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); } } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); @@ -882,11 +882,11 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg))) + if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir, target); else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(sg_virt(sg), sg->dma_length); + dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); } } -- cgit v1.2.3-59-g8ed1b From 23a22d57a8962479ca630c9542e62d6f86fdf927 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Jan 2009 14:24:04 -0800 Subject: bzip2/lzma: comprehensible error messages for missing decompressor Instead of failing to identify a compressed image with a decompressor that we don't have compiled in, identify it and fail with a comprehensible panic message. Signed-off-by: H. Peter Anvin --- init/do_mounts_rd.c | 5 ++++- init/initramfs.c | 12 +++++++++++- lib/decompress.c | 16 ++++++++++------ 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a015e267fd17..91d0cfca5071 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -79,9 +79,12 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) sys_read(fd, buf, size); *decompressor = decompress_method(buf, size, &compress_name); - if (*decompressor) { + if (compress_name) { printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", compress_name, start_block); + if (!*decompressor) + printk(KERN_CRIT "RAMDISK: %s decompressor not configured!\n", + compress_name); nblocks = 0; goto done; } diff --git a/init/initramfs.c b/init/initramfs.c index 76f4a0125338..9a7290ec8187 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -421,6 +421,8 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { int written; decompress_fn decompress; + const char *compress_name; + static __initdata char msg_buf[64]; dry_run = check_only; header_buf = kmalloc(110, GFP_KERNEL); @@ -449,10 +451,18 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; - decompress = decompress_method(buf, len, NULL); + decompress = decompress_method(buf, len, &compress_name); if (decompress) decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); + else if (compress_name) { + if (!message) { + snprintf(msg_buf, sizeof msg_buf, + "compression method %s not configured", + compress_name); + message = msg_buf; + } + } if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; diff --git a/lib/decompress.c b/lib/decompress.c index edac55cc7823..961f367320fc 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -13,21 +13,25 @@ #include #include +#ifndef CONFIG_DECOMPRESS_GZIP +# define gunzip NULL +#endif +#ifndef CONFIG_DECOMPRESS_BZIP2 +# define bunzip2 NULL +#endif +#ifndef CONFIG_DECOMPRESS_LZMA +# define unlzma NULL +#endif + static const struct compress_format { unsigned char magic[2]; const char *name; decompress_fn decompressor; } compressed_formats[] = { -#ifdef CONFIG_DECOMPRESS_GZIP { {037, 0213}, "gzip", gunzip }, { {037, 0236}, "gzip", gunzip }, -#endif -#ifdef CONFIG_DECOMPRESS_BZIP2 { {0x42, 0x5a}, "bzip2", bunzip2 }, -#endif -#ifdef CONFIG_DECOMPRESS_LZMA { {0x5d, 0x00}, "lzma", unlzma }, -#endif { {0, 0}, NULL, NULL } }; -- cgit v1.2.3-59-g8ed1b From ceacc2c1c85ac498ca4cf297bdfe5b4aaa9fd0e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Jan 2009 14:46:40 +0100 Subject: sched: make plist a library facility Ingo Molnar wrote: > here's a new build failure with tip/sched/rt: > > LD .tmp_vmlinux1 > kernel/built-in.o: In function `set_curr_task_rt': > sched.c:(.text+0x3675): undefined reference to `plist_del' > kernel/built-in.o: In function `pick_next_task_rt': > sched.c:(.text+0x37ce): undefined reference to `plist_del' > kernel/built-in.o: In function `enqueue_pushable_task': > sched.c:(.text+0x381c): undefined reference to `plist_del' Eliminate the plist library kconfig and make it available unconditionally. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- init/Kconfig | 1 - kernel/sched_rt.c | 21 +++++++++++++++------ lib/Kconfig | 6 ------ lib/Makefile | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/init/Kconfig b/init/Kconfig index a724a149bf3f..19b78aa010e3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -874,7 +874,6 @@ config SLABINFO config RT_MUTEXES boolean - select PLIST config BASE_SMALL int diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 4230b15fe90e..48d1f6e8497a 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -114,14 +114,23 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) #else +static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) +{ +} + +static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) +{ +} + static inline -void enqueue_pushable_task(struct rq *rq, struct task_struct *p) {} -static inline -void dequeue_pushable_task(struct rq *rq, struct task_struct *p) {} -static inline -void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} +void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) +{ +} + static inline -void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} +void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) +{ +} #endif /* CONFIG_SMP */ diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b9083..fc8ea1ca59d8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -136,12 +136,6 @@ config TEXTSEARCH_BM config TEXTSEARCH_FSM tristate -# -# plist support is select#ed if needed -# -config PLIST - boolean - config HAS_IOMEM boolean depends on !NO_IOMEM diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..902d73851044 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o + proportions.o prio_heap.o ratelimit.o show_mem.o \ + is_single_threaded.o plist.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -40,7 +41,6 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o -obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o -- cgit v1.2.3-59-g8ed1b From ff491a7334acfd74e515c896632e37e401f52676 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Feb 2009 23:56:36 -0800 Subject: netlink: change return-value logic of netlink_broadcast() Currently, netlink_broadcast() reports errors to the caller if no messages at all were delivered: 1) If, at least, one message has been delivered correctly, returns 0. 2) Otherwise, if no messages at all were delivered due to skb_clone() failure, return -ENOBUFS. 3) Otherwise, if there are no listeners, return -ESRCH. With this patch, the caller knows if the delivery of any of the messages to the listeners have failed: 1) If it fails to deliver any message (for whatever reason), return -ENOBUFS. 2) Otherwise, if all messages were delivered OK, returns 0. 3) Otherwise, if no listeners, return -ESRCH. In the current ctnetlink code and in Netfilter in general, we can add reliable logging and connection tracking event delivery by dropping the packets whose events were not successfully delivered over Netlink. Of course, this option would be settable via /proc as this approach reduces performance (in terms of filtered connections per seconds by a stateful firewall) but providing reliable logging and event delivery (for conntrackd) in return. This patch also changes some clients of netlink_broadcast() that may report ENOBUFS errors via printk. This error handling is not of any help. Instead, the userspace daemons that are listening to those netlink messages should resync themselves with the kernel-side if they hit ENOBUFS. BTW, netlink_broadcast() clients include those that call cn_netlink_send(), nlmsg_multicast() and genlmsg_multicast() since they internally call netlink_broadcast() and return its error value. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/acpi/event.c | 6 +----- drivers/scsi/scsi_transport_fc.c | 16 ++++------------ drivers/scsi/scsi_transport_iscsi.c | 12 ++---------- drivers/video/uvesafb.c | 5 ++++- fs/dquot.c | 5 +---- lib/kobject_uevent.c | 3 +++ net/netlink/af_netlink.c | 8 ++++++-- net/wimax/op-msg.c | 9 +++------ net/wimax/stack.c | 12 ++++-------- 9 files changed, 28 insertions(+), 48 deletions(-) (limited to 'lib') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 0c24bd4d6562..aeb7e5fb4a04 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -235,11 +235,7 @@ int acpi_bus_generate_netlink_event(const char *device_class, return result; } - result = - genlmsg_multicast(skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC); - if (result) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Failed to send a Genetlink message!\n")); + genlmsg_multicast(skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC); return 0; } diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 5f77417ed585..3ee4eb40abcf 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -533,12 +533,8 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number, event->event_code = event_code; event->event_data = event_data; - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, - GFP_KERNEL); - if (err && (err != -ESRCH)) /* filter no recipient errors */ - /* nlmsg_multicast already kfree_skb'd */ - goto send_fail; - + nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); return; send_fail_skb: @@ -607,12 +603,8 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, event->event_code = FCH_EVT_VENDOR_UNIQUE; memcpy(&event->event_data, data_buf, data_len); - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, - GFP_KERNEL); - if (err && (err != -ESRCH)) /* filter no recipient errors */ - /* nlmsg_multicast already kfree_skb'd */ - goto send_vendor_fail; - + nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); return; send_vendor_fail_skb: diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 75c9297694cb..2adfab8c11c1 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -966,15 +966,7 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt) static int iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp) { - int rc; - - rc = netlink_broadcast(nls, skb, 0, 1, gfp); - if (rc < 0) { - printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc); - return rc; - } - - return 0; + return netlink_broadcast(nls, skb, 0, 1, gfp); } static int @@ -1207,7 +1199,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, * the user and when the daemon is restarted it will handle it */ rc = iscsi_broadcast_skb(skb, GFP_KERNEL); - if (rc < 0) + if (rc == -ESRCH) iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u. Check iscsi daemon\n", diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 6c2d37fdd3b9..74ae75899009 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -204,8 +204,11 @@ static int uvesafb_exec(struct uvesafb_ktask *task) } else { v86d_started = 1; err = cn_netlink_send(m, 0, gfp_any()); + if (err == -ENOBUFS) + err = 0; } - } + } else if (err == -ENOBUFS) + err = 0; if (!err && !(task->t.flags & TF_EXIT)) err = !wait_for_completion_timeout(task->done, diff --git a/fs/dquot.c b/fs/dquot.c index bca3cac4bee7..d6add0bf5ad3 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1057,10 +1057,7 @@ static void send_warning(const struct dquot *dquot, const char warntype) goto attr_err_out; genlmsg_end(skb, msg_head); - ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - if (ret < 0 && ret != -ESRCH) - printk(KERN_ERR - "VFS: Failed to send notification message: %d\n", ret); + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 318328ddbd1c..38131028d16f 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -227,6 +227,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, NETLINK_CB(skb).dst_group = 1; retval = netlink_broadcast(uevent_sock, skb, 0, 1, GFP_KERNEL); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS) + retval = 0; } else retval = -ENOMEM; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a9..6ee69c27f806 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,6 +950,7 @@ struct netlink_broadcast_data { u32 pid; u32 group; int failure; + int delivery_failure; int congested; int delivered; gfp_t allocation; @@ -999,6 +1000,7 @@ static inline int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1025,6 +1027,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, info.pid = pid; info.group = group; info.failure = 0; + info.delivery_failure = 0; info.congested = 0; info.delivered = 0; info.allocation = allocation; @@ -1045,13 +1048,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); + if (info.delivery_failure || info.failure) + return -ENOBUFS; + if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) yield(); return 0; } - if (info.failure) - return -ENOBUFS; return -ESRCH; } EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index cb3b4ad53683..5d149c1b5f0d 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -258,7 +258,6 @@ EXPORT_SYMBOL_GPL(wimax_msg_len); */ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) { - int result; struct device *dev = wimax_dev->net_dev->dev.parent; void *msg = skb->data; size_t size = skb->len; @@ -266,11 +265,9 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); d_dump(2, dev, msg, size); - result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - d_printf(1, dev, "CTX: genl multicast result %d\n", result); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - return result; + genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); + d_printf(1, dev, "CTX: genl multicast done\n"); + return 0; } EXPORT_SYMBOL_GPL(wimax_msg_send); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3869c0327882..a0ee76b52510 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -163,16 +163,12 @@ int wimax_gnl_re_state_change_send( struct device *dev = wimax_dev_to_dev(wimax_dev); d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", wimax_dev, report_skb); - if (report_skb == NULL) + if (report_skb == NULL) { + result = -ENOMEM; goto out; - genlmsg_end(report_skb, header); - result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - if (result < 0) { - dev_err(dev, "RE_STCH: Error sending: %d\n", result); - nlmsg_free(report_skb); } + genlmsg_end(report_skb, header); + genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); out: d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", wimax_dev, report_skb, result); -- cgit v1.2.3-59-g8ed1b From c37682d907a615c9a8751748b58e9ba47d415429 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 14 Jan 2009 20:46:02 +0000 Subject: lmb: Rework lmb_dump_all() output The lmb_dump_all() output didn't include the RMO size, which is interesting on powerpc. The output was also a bit spacey and not well aligned, and didn't show you the end addresses. Signed-off-by: Michael Ellerman Acked-by: David S. Miller Signed-off-by: Benjamin Herrenschmidt --- lib/lmb.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/lmb.c b/lib/lmb.c index 97e547037084..e4a6482d8b26 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -29,33 +29,33 @@ static int __init early_lmb(char *p) } early_param("lmb", early_lmb); -void lmb_dump_all(void) +static void lmb_dump(struct lmb_region *region, char *name) { - unsigned long i; + unsigned long long base, size; + int i; + + pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); + + for (i = 0; i < region->cnt; i++) { + base = region->region[i].base; + size = region->region[i].size; + + pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", + name, i, base, base + size - 1, size); + } +} +void lmb_dump_all(void) +{ if (!lmb_debug) return; - pr_info("lmb_dump_all:\n"); - pr_info(" memory.cnt = 0x%lx\n", lmb.memory.cnt); - pr_info(" memory.size = 0x%llx\n", - (unsigned long long)lmb.memory.size); - for (i=0; i < lmb.memory.cnt ;i++) { - pr_info(" memory.region[0x%lx].base = 0x%llx\n", - i, (unsigned long long)lmb.memory.region[i].base); - pr_info(" .size = 0x%llx\n", - (unsigned long long)lmb.memory.region[i].size); - } + pr_info("LMB configuration:\n"); + pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size); + pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); - pr_info(" reserved.cnt = 0x%lx\n", lmb.reserved.cnt); - pr_info(" reserved.size = 0x%llx\n", - (unsigned long long)lmb.memory.size); - for (i=0; i < lmb.reserved.cnt ;i++) { - pr_info(" reserved.region[0x%lx].base = 0x%llx\n", - i, (unsigned long long)lmb.reserved.region[i].base); - pr_info(" .size = 0x%llx\n", - (unsigned long long)lmb.reserved.region[i].size); - } + lmb_dump(&lmb.memory, "memory"); + lmb_dump(&lmb.reserved, "reserved"); } static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, -- cgit v1.2.3-59-g8ed1b From adf8b37bafc1495393201a2ae4235846371870d0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 12 Feb 2009 13:56:34 +0100 Subject: [ARM] 5386/2: unwind: Add Makefile and Kconfig entries for ARM stack unwinding This patch also makes the frame pointer default to y only if !ARM_UNWIND. LOCKDEP no longer selects FRAME_POINTER if ARM_UNWIND is enabled. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 21 ++++++++++++++++----- arch/arm/Makefile | 4 ++++ arch/arm/kernel/Makefile | 1 + lib/Kconfig.debug | 2 +- 4 files changed, 22 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 192ee01a9ba2..35f8db35f442 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -2,18 +2,29 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -# RMK wants arm kernels compiled with frame pointers so hardwire this to y. +# RMK wants arm kernels compiled with frame pointers or stack unwinding. # If you know what you are doing and are willing to live without stack # traces, you can get a slightly smaller kernel by setting this option to # n, but then RMK will have to kill you ;). config FRAME_POINTER bool - default y + default y if !ARM_UNWIND help If you say N here, the resulting kernel will be slightly smaller and - faster. However, when a problem occurs with the kernel, the - information that is reported is severely limited. Most people - should say Y here. + faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, + when a problem occurs with the kernel, the information that is + reported is severely limited. + +config ARM_UNWIND + bool "Enable stack unwinding support" + depends on AEABI && EXPERIMENTAL + default y + help + This option enables stack unwinding support in the kernel + using the information automatically generated by the + compiler. The resulting kernel image is slightly bigger but + the performance is not affected. Currently, this feature + only works with EABI compilers. If unsure say Y. config DEBUG_USER bool "Verbose user fault messages" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 24e0f0187697..e7ef876e574b 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -85,6 +85,10 @@ else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif +ifeq ($(CONFIG_ARM_UNWIND),y) +CFLAGS_ABI +=-funwind-tables +endif + # Need -Uarm for gcc < 3.x KBUILD_CFLAGS +=$(CFLAGS_ABI) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm KBUILD_AFLAGS +=$(CFLAGS_ABI) $(arch-y) $(tune-y) -msoft-float diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 4305345987d3..ca60d335e8fa 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_ATAGS_PROC) += atags.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 29044f500269..08275a5aa00e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -402,7 +402,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !X86 && !MIPS && !PPC + select FRAME_POINTER if !X86 && !MIPS && !PPC && !ARM_UNWIND select KALLSYMS select KALLSYMS_ALL -- cgit v1.2.3-59-g8ed1b From e4aa7ca5a2e6d44f07ceb87d9448113f5b48a334 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 19 Feb 2009 13:36:55 -0800 Subject: bzip2/lzma: don't stop search at first unconfigured compression Impact: Bugfix, avoids kernels which build but panic on boot Fix a bug in decompress.c : only scanned until the first non-configured compressor (with disastrous result especially if that was gzip.) Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- lib/decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress.c b/lib/decompress.c index 961f367320fc..d2842f571674 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -43,7 +43,7 @@ decompress_fn decompress_method(const unsigned char *inbuf, int len, if (len < 2) return NULL; /* Need at least this much... */ - for (cf = compressed_formats; cf->decompressor; cf++) { + for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2)) break; -- cgit v1.2.3-59-g8ed1b From 152de30bced150617e5731a9fe2364c9d04fe26c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Feb 2009 15:38:47 -0800 Subject: docsrc: use config instead of menuconfig BUILD_DOCSRC should be controlled by "config" instead of "menuconfig". I have no idea how I managed to use "menuconfig" here. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 29044f500269..1bcf9cd4baa0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -838,7 +838,7 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. -menuconfig BUILD_DOCSRC +config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK help -- cgit v1.2.3-59-g8ed1b From 3e8ebb5c433f016dff5824587436642d87fc2e6c Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 1 Mar 2009 20:41:41 -0500 Subject: debug_objects: add boot-parameter toggle to turn object debugging off again While trying to debug why my Atom netbook is falling over booting rawhide debug-enabled kernels, I stumbled across the fact that we've been enabling object debugging by default. However, once you default it to on, you've got no way to turn it back off again at runtime. Add a boolean toggle to turn it off. I would just make it an int module_param, however people may already expect the boolean enable behaviour, so just add an analogue for disabling. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 +++ lib/debugobjects.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'lib') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54f21a5c262b..1a328f33e1df 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -604,6 +604,9 @@ and is between 256 and 4096 characters. It is defined in the file debug_objects [KNL] Enable object debugging + no_debug_objects + [KNL] Disable object debugging + debugpat [X86] Enable PAT debugging decnet.addr= [HW,NET] diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 5d99be1fd988..90e46fa12721 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -55,7 +55,15 @@ static int __init enable_object_debug(char *str) debug_objects_enabled = 1; return 0; } + +static int __init disable_object_debug(char *str) +{ + debug_objects_enabled = 0; + return 0; +} + early_param("debug_objects", enable_object_debug); +early_param("no_debug_objects", disable_object_debug); static const char *obj_states[ODEBUG_STATE_MAX] = { [ODEBUG_STATE_NONE] = "none", -- cgit v1.2.3-59-g8ed1b From e9cc8bddaea3944fabfebb968bc88d603239beed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 4 Mar 2009 14:53:30 +0800 Subject: netlink: Move netlink attribute parsing support to lib Netlink attribute parsing may be used even if CONFIG_NET is not set. Move it from net/netlink to lib and control its inclusion based on the new config symbol CONFIG_NLATTR, which is selected by CONFIG_NET. Signed-off-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Herbert Xu --- lib/Kconfig | 6 + lib/Makefile | 2 + lib/nlattr.c | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/Kconfig | 1 + net/netlink/Makefile | 2 +- net/netlink/attr.c | 473 --------------------------------------------------- 6 files changed, 483 insertions(+), 474 deletions(-) create mode 100644 lib/nlattr.c delete mode 100644 net/netlink/attr.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b9083..cea9e30a88ff 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -174,4 +174,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS depends on EXPERIMENTAL && BROKEN +# +# Netlink attribute parsing support is select'ed if needed +# +config NLATTR + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..b2c09da02cae 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_NLATTR) += nlattr.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/nlattr.c b/lib/nlattr.c new file mode 100644 index 000000000000..56c3ce7fe29a --- /dev/null +++ b/lib/nlattr.c @@ -0,0 +1,473 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + +static int validate_nla(struct nlattr *nla, int maxtype, + const struct nla_policy *policy) +{ + const struct nla_policy *pt; + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); + + if (type <= 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); + + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; + + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; + + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + case NLA_BINARY: + if (pt->len && attrlen > pt->len) + return -ERANGE; + break; + + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; + case NLA_NESTED: + /* a nested attributes is allowed to be empty; if its not, + * it must have a size of at least NLA_HDRLEN. + */ + if (attrlen == 0) + break; + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } + + return 0; +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + nla_for_each_attr(nla, head, len, rem) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + err = 0; +errout: + return err; +} + +/** + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessable via the attribute type. Attributes with a type + * exceeding maxtype will be silently ignored for backwards compatibility + * reasons. policy may be set to NULL if no validation is required. + * + * Returns 0 on success or a negative error code. + */ +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type > 0 && type <= maxtype) { + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + tb[type] = nla; + } + } + + if (unlikely(rem > 0)) + printk(KERN_WARNING "netlink: %d bytes leftover after parsing " + "attributes.\n", rem); + + err = 0; +errout: + return err; +} + +/** + * nla_find - Find a specific attribute in a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @attrtype: type of attribute to look for + * + * Returns the first attribute in the stream matching the specified type. + */ +struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +{ + struct nlattr *nla; + int rem; + + nla_for_each_attr(nla, head, len, rem) + if (nla_type(nla) == attrtype) + return nla; + + return NULL; +} + +/** + * nla_strlcpy - Copy string attribute payload into a sized buffer + * @dst: where to copy the string to + * @nla: attribute to copy the string from + * @dstsize: size of destination buffer + * + * Copies at most dstsize - 1 bytes into the destination buffer. + * The result is always a valid NUL-terminated string. Unlike + * strlcpy the destination buffer is always padded out. + * + * Returns the length of the source buffer. + */ +size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla); + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + if (dstsize > 0) { + size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; + + memset(dst, 0, dstsize); + memcpy(dst, src, len); + } + + return srclen; +} + +/** + * nla_memcpy - Copy a netlink attribute into another memory area + * @dest: where to copy to memcpy + * @src: netlink attribute to copy from + * @count: size of the destination area + * + * Note: The number of bytes copied is limited by the length of + * attribute's payload. memcpy + * + * Returns the number of bytes copied. + */ +int nla_memcpy(void *dest, const struct nlattr *src, int count) +{ + int minlen = min_t(int, count, nla_len(src)); + + memcpy(dest, nla_data(src), minlen); + + return minlen; +} + +/** + * nla_memcmp - Compare an attribute with sized memory area + * @nla: netlink attribute + * @data: memory area + * @size: size of memory area + */ +int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size) +{ + int d = nla_len(nla) - size; + + if (d == 0) + d = memcmp(nla_data(nla), data, size); + + return d; +} + +/** + * nla_strcmp - Compare a string attribute against a string + * @nla: netlink string attribute + * @str: another string + */ +int nla_strcmp(const struct nlattr *nla, const char *str) +{ + int len = strlen(str) + 1; + int d = nla_len(nla) - len; + + if (d == 0) + d = memcmp(nla_data(nla), str, len); + + return d; +} + +/** + * __nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + struct nlattr *nla; + + nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla->nla_type = attrtype; + nla->nla_len = nla_attr_size(attrlen); + + memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); + + return nla; +} + +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + +/** + * nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return NULL; + + return __nla_reserve(skb, attrtype, attrlen); +} + +/** + * nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + +/** + * __nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nlattr *nla; + + nla = __nla_reserve(skb, attrtype, attrlen); + memcpy(nla_data(nla), data, attrlen); +} + +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} + +/** + * nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return -EMSGSIZE; + + __nla_put(skb, attrtype, attrlen, data); + return 0; +} + +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} + +/** + * nla_append - Add a netlink attribute without header or padding + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_append(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + memcpy(skb_put(skb, attrlen), data, attrlen); + return 0; +} + +EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_parse); +EXPORT_SYMBOL(nla_find); +EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); +EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); +EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); +EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); +EXPORT_SYMBOL(nla_memcpy); +EXPORT_SYMBOL(nla_memcmp); +EXPORT_SYMBOL(nla_strcmp); +EXPORT_SYMBOL(nla_append); diff --git a/net/Kconfig b/net/Kconfig index cdb8fdef6c4a..eab40a481356 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -4,6 +4,7 @@ menuconfig NET bool "Networking support" + select NLATTR ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/netlink/Makefile b/net/netlink/Makefile index e3589c2de49e..bdd6ddf4e95b 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -2,4 +2,4 @@ # Makefile for the netlink driver. # -obj-y := af_netlink.o attr.o genetlink.o +obj-y := af_netlink.o genetlink.o diff --git a/net/netlink/attr.c b/net/netlink/attr.c deleted file mode 100644 index 56c3ce7fe29a..000000000000 --- a/net/netlink/attr.c +++ /dev/null @@ -1,473 +0,0 @@ -/* - * NETLINK Netlink attributes - * - * Authors: Thomas Graf - * Alexey Kuznetsov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { - [NLA_U8] = sizeof(u8), - [NLA_U16] = sizeof(u16), - [NLA_U32] = sizeof(u32), - [NLA_U64] = sizeof(u64), - [NLA_NESTED] = NLA_HDRLEN, -}; - -static int validate_nla(struct nlattr *nla, int maxtype, - const struct nla_policy *policy) -{ - const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - - if (type <= 0 || type > maxtype) - return 0; - - pt = &policy[type]; - - BUG_ON(pt->type > NLA_TYPE_MAX); - - switch (pt->type) { - case NLA_FLAG: - if (attrlen > 0) - return -ERANGE; - break; - - case NLA_NUL_STRING: - if (pt->len) - minlen = min_t(int, attrlen, pt->len + 1); - else - minlen = attrlen; - - if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) - return -EINVAL; - /* fall through */ - - case NLA_STRING: - if (attrlen < 1) - return -ERANGE; - - if (pt->len) { - char *buf = nla_data(nla); - - if (buf[attrlen - 1] == '\0') - attrlen--; - - if (attrlen > pt->len) - return -ERANGE; - } - break; - - case NLA_BINARY: - if (pt->len && attrlen > pt->len) - return -ERANGE; - break; - - case NLA_NESTED_COMPAT: - if (attrlen < pt->len) - return -ERANGE; - if (attrlen < NLA_ALIGN(pt->len)) - break; - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) - return -ERANGE; - nla = nla_data(nla) + NLA_ALIGN(pt->len); - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) - return -ERANGE; - break; - case NLA_NESTED: - /* a nested attributes is allowed to be empty; if its not, - * it must have a size of at least NLA_HDRLEN. - */ - if (attrlen == 0) - break; - default: - if (pt->len) - minlen = pt->len; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; - - if (attrlen < minlen) - return -ERANGE; - } - - return 0; -} - -/** - * nla_validate - Validate a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @maxtype: maximum attribute type to be expected - * @policy: validation policy - * - * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. - * - * Returns 0 on success or a negative error code. - */ -int nla_validate(struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - nla_for_each_attr(nla, head, len, rem) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - err = 0; -errout: - return err; -} - -/** - * nla_parse - Parse a stream of attributes into a tb buffer - * @tb: destination array with maxtype+1 elements - * @maxtype: maximum attribute type to be expected - * @head: head of attribute stream - * @len: length of attribute stream - * @policy: validation policy - * - * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessable via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. - * - * Returns 0 on success or a negative error code. - */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type > 0 && type <= maxtype) { - if (policy) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - tb[type] = nla; - } - } - - if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); - - err = 0; -errout: - return err; -} - -/** - * nla_find - Find a specific attribute in a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @attrtype: type of attribute to look for - * - * Returns the first attribute in the stream matching the specified type. - */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) -{ - struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) - if (nla_type(nla) == attrtype) - return nla; - - return NULL; -} - -/** - * nla_strlcpy - Copy string attribute payload into a sized buffer - * @dst: where to copy the string to - * @nla: attribute to copy the string from - * @dstsize: size of destination buffer - * - * Copies at most dstsize - 1 bytes into the destination buffer. - * The result is always a valid NUL-terminated string. Unlike - * strlcpy the destination buffer is always padded out. - * - * Returns the length of the source buffer. - */ -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) -{ - size_t srclen = nla_len(nla); - char *src = nla_data(nla); - - if (srclen > 0 && src[srclen - 1] == '\0') - srclen--; - - if (dstsize > 0) { - size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; - - memset(dst, 0, dstsize); - memcpy(dst, src, len); - } - - return srclen; -} - -/** - * nla_memcpy - Copy a netlink attribute into another memory area - * @dest: where to copy to memcpy - * @src: netlink attribute to copy from - * @count: size of the destination area - * - * Note: The number of bytes copied is limited by the length of - * attribute's payload. memcpy - * - * Returns the number of bytes copied. - */ -int nla_memcpy(void *dest, const struct nlattr *src, int count) -{ - int minlen = min_t(int, count, nla_len(src)); - - memcpy(dest, nla_data(src), minlen); - - return minlen; -} - -/** - * nla_memcmp - Compare an attribute with sized memory area - * @nla: netlink attribute - * @data: memory area - * @size: size of memory area - */ -int nla_memcmp(const struct nlattr *nla, const void *data, - size_t size) -{ - int d = nla_len(nla) - size; - - if (d == 0) - d = memcmp(nla_data(nla), data, size); - - return d; -} - -/** - * nla_strcmp - Compare a string attribute against a string - * @nla: netlink string attribute - * @str: another string - */ -int nla_strcmp(const struct nlattr *nla, const char *str) -{ - int len = strlen(str) + 1; - int d = nla_len(nla) - len; - - if (d == 0) - d = memcmp(nla_data(nla), str, len); - - return d; -} - -/** - * __nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - struct nlattr *nla; - - nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); - nla->nla_type = attrtype; - nla->nla_len = nla_attr_size(attrlen); - - memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); - - return nla; -} - -/** - * __nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the payload. - */ -void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - void *start; - - start = skb_put(skb, NLA_ALIGN(attrlen)); - memset(start, 0, NLA_ALIGN(attrlen)); - - return start; -} - -/** - * nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return NULL; - - return __nla_reserve(skb, attrtype, attrlen); -} - -/** - * nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return NULL; - - return __nla_reserve_nohdr(skb, attrlen); -} - -/** - * __nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) -{ - struct nlattr *nla; - - nla = __nla_reserve(skb, attrtype, attrlen); - memcpy(nla_data(nla), data, attrlen); -} - -/** - * __nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute payload. - */ -void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - void *start; - - start = __nla_reserve_nohdr(skb, attrlen); - memcpy(start, data, attrlen); -} - -/** - * nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -EMSGSIZE; - - __nla_put(skb, attrtype, attrlen, data); - return 0; -} - -/** - * nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - __nla_put_nohdr(skb, attrlen, data); - return 0; -} - -/** - * nla_append - Add a netlink attribute without header or padding - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_append(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - memcpy(skb_put(skb, attrlen), data, attrlen); - return 0; -} - -EXPORT_SYMBOL(nla_validate); -EXPORT_SYMBOL(nla_parse); -EXPORT_SYMBOL(nla_find); -EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(__nla_reserve); -EXPORT_SYMBOL(__nla_reserve_nohdr); -EXPORT_SYMBOL(nla_reserve); -EXPORT_SYMBOL(nla_reserve_nohdr); -EXPORT_SYMBOL(__nla_put); -EXPORT_SYMBOL(__nla_put_nohdr); -EXPORT_SYMBOL(nla_put); -EXPORT_SYMBOL(nla_put_nohdr); -EXPORT_SYMBOL(nla_memcpy); -EXPORT_SYMBOL(nla_memcmp); -EXPORT_SYMBOL(nla_strcmp); -EXPORT_SYMBOL(nla_append); -- cgit v1.2.3-59-g8ed1b From 1075414b06109a99b0e87601e84c74a95bd45681 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Mar 2009 12:27:27 +0100 Subject: lockdep: require framepointers for x86 Require framepointers for x86, because otherwise we'll be having empty stack traces, which is useless. Signed-off-by: Peter Zijlstra LKML-Reference: <1236167295.5330.7240.camel@laptop> Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 29044f500269..9b4efb14a497 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -402,7 +402,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !X86 && !MIPS && !PPC + select FRAME_POINTER if !MIPS && !PPC select KALLSYMS select KALLSYMS_ALL -- cgit v1.2.3-59-g8ed1b From 5ee00bd4691e7364bb7b62e2068d473cd5cb9320 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 12:14:24 +0100 Subject: dma-debug: add Kconfig entry Impact: add a Kconfig entry for DMA-API debugging Signed-off-by: Joerg Roedel --- arch/Kconfig | 2 ++ lib/Kconfig.debug | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index 550dab22daa1..830c16a2b801 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -106,3 +106,5 @@ config HAVE_CLK The calls support software clock gating and thus are a key power management tool on many systems. +config HAVE_DMA_API_DEBUG + bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1bcf9cd4baa0..d9cbada7e2f8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -902,6 +902,17 @@ config DYNAMIC_PRINTK_DEBUG debugging for all modules. This mode can be turned off via the above disable command. +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + This option causes a performance degredation. Use only if you want + to debug device drivers. If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" -- cgit v1.2.3-59-g8ed1b From f2f45e5f3c921c73c913e9a9c00f21ec01c86b4d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 12:19:52 +0100 Subject: dma-debug: add header file and core data structures Impact: add groundwork for DMA-API debugging Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 25 +++++++++++++++++++++++++ lib/Makefile | 2 ++ lib/dma-debug.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 include/linux/dma-debug.h create mode 100644 lib/dma-debug.c (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h new file mode 100644 index 000000000000..ce4ace707c7b --- /dev/null +++ b/include/linux/dma-debug.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __DMA_DEBUG_H +#define __DMA_DEBUG_H + +struct device; + +#endif /* __DMA_DEBUG_H */ diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..50b48cf63e4a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dma-debug.c b/lib/dma-debug.c new file mode 100644 index 000000000000..31099712328c --- /dev/null +++ b/lib/dma-debug.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +enum { + dma_debug_single, + dma_debug_page, + dma_debug_sg, + dma_debug_coherent, +}; + +struct dma_debug_entry { + struct list_head list; + struct device *dev; + int type; + phys_addr_t paddr; + u64 dev_addr; + u64 size; + int direction; + int sg_call_ents; + int sg_mapped_ents; +}; + -- cgit v1.2.3-59-g8ed1b From 30dfa90cc8c4c9621d8d5aa9499f3a5df3376307 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 12:34:49 +0100 Subject: dma-debug: add hash functions for dma_debug_entries Impact: implement necessary functions for the core hash Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 31099712328c..5ff7d2e2b60e 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -18,9 +18,14 @@ */ #include +#include #include #include +#define HASH_SIZE 1024ULL +#define HASH_FN_SHIFT 13 +#define HASH_FN_MASK (HASH_SIZE - 1) + enum { dma_debug_single, dma_debug_page, @@ -40,3 +45,99 @@ struct dma_debug_entry { int sg_mapped_ents; }; +struct hash_bucket { + struct list_head list; + spinlock_t lock; +} __cacheline_aligned_in_smp; + +/* Hash list to save the allocated dma addresses */ +static struct hash_bucket dma_entry_hash[HASH_SIZE]; + +/* + * Hash related functions + * + * Every DMA-API request is saved into a struct dma_debug_entry. To + * have quick access to these structs they are stored into a hash. + */ +static int hash_fn(struct dma_debug_entry *entry) +{ + /* + * Hash function is based on the dma address. + * We use bits 20-27 here as the index into the hash + */ + return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; +} + +/* + * Request exclusive access to a hash bucket for a given dma_debug_entry. + */ +static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, + unsigned long *flags) +{ + int idx = hash_fn(entry); + unsigned long __flags; + + spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); + *flags = __flags; + return &dma_entry_hash[idx]; +} + +/* + * Give up exclusive access to the hash bucket + */ +static void put_hash_bucket(struct hash_bucket *bucket, + unsigned long *flags) +{ + unsigned long __flags = *flags; + + spin_unlock_irqrestore(&bucket->lock, __flags); +} + +/* + * Search a given entry in the hash bucket list + */ +static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, + struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + + list_for_each_entry(entry, &bucket->list, list) { + if ((entry->dev_addr == ref->dev_addr) && + (entry->dev == ref->dev)) + return entry; + } + + return NULL; +} + +/* + * Add an entry to a hash bucket + */ +static void hash_bucket_add(struct hash_bucket *bucket, + struct dma_debug_entry *entry) +{ + list_add_tail(&entry->list, &bucket->list); +} + +/* + * Remove entry from a hash bucket list + */ +static void hash_bucket_del(struct dma_debug_entry *entry) +{ + list_del(&entry->list); +} + +/* + * Wrapper function for adding an entry to the hash. + * This function takes care of locking itself. + */ +static void add_dma_entry(struct dma_debug_entry *entry) +{ + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(entry, &flags); + hash_bucket_add(bucket, entry); + put_hash_bucket(bucket, &flags); +} + -- cgit v1.2.3-59-g8ed1b From 3b1e79ed734f58ac41ca0a287ff03ca355f120ad Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 12:42:46 +0100 Subject: dma-debug: add allocator code Impact: add allocator code for struct dma_debug_entry Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 5ff7d2e2b60e..b60914669656 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -52,6 +52,16 @@ struct hash_bucket { /* Hash list to save the allocated dma addresses */ static struct hash_bucket dma_entry_hash[HASH_SIZE]; +/* List of pre-allocated dma_debug_entry's */ +static LIST_HEAD(free_entries); +/* Lock for the list above */ +static DEFINE_SPINLOCK(free_entries_lock); + +/* Global disable flag - will be set in case of an error */ +static bool global_disable __read_mostly; + +static u32 num_free_entries; +static u32 min_free_entries; /* * Hash related functions @@ -141,3 +151,50 @@ static void add_dma_entry(struct dma_debug_entry *entry) put_hash_bucket(bucket, &flags); } +/* struct dma_entry allocator + * + * The next two functions implement the allocator for + * struct dma_debug_entries. + */ +static struct dma_debug_entry *dma_entry_alloc(void) +{ + struct dma_debug_entry *entry = NULL; + unsigned long flags; + + spin_lock_irqsave(&free_entries_lock, flags); + + if (list_empty(&free_entries)) { + printk(KERN_ERR "DMA-API: debugging out of memory " + "- disabling\n"); + global_disable = true; + goto out; + } + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + +out: + spin_unlock_irqrestore(&free_entries_lock, flags); + + return entry; +} + +static void dma_entry_free(struct dma_debug_entry *entry) +{ + unsigned long flags; + + /* + * add to beginning of the list - this way the entries are + * more likely cache hot when they are reallocated. + */ + spin_lock_irqsave(&free_entries_lock, flags); + list_add(&entry->list, &free_entries); + num_free_entries += 1; + spin_unlock_irqrestore(&free_entries_lock, flags); +} + -- cgit v1.2.3-59-g8ed1b From 6bf078715c1998d4d10716251cc10ce45908594c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 12:54:42 +0100 Subject: dma-debug: add initialization code Impact: add code to initialize dma-debug core data structures Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 14 ++++++++++ lib/dma-debug.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index ce4ace707c7b..345d5387a30d 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -20,6 +20,20 @@ #ifndef __DMA_DEBUG_H #define __DMA_DEBUG_H +#include + struct device; +#ifdef CONFIG_DMA_API_DEBUG + +extern void dma_debug_init(u32 num_entries); + +#else /* CONFIG_DMA_API_DEBUG */ + +static inline void dma_debug_init(u32 num_entries) +{ +} + +#endif /* CONFIG_DMA_API_DEBUG */ + #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index b60914669656..5b50bb31f7c6 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -21,6 +21,7 @@ #include #include #include +#include #define HASH_SIZE 1024ULL #define HASH_FN_SHIFT 13 @@ -198,3 +199,68 @@ static void dma_entry_free(struct dma_debug_entry *entry) spin_unlock_irqrestore(&free_entries_lock, flags); } +/* + * DMA-API debugging init code + * + * The init code does two things: + * 1. Initialize core data structures + * 2. Preallocate a given number of dma_debug_entry structs + */ + +static int prealloc_memory(u32 num_entries) +{ + struct dma_debug_entry *entry, *next_entry; + int i; + + for (i = 0; i < num_entries; ++i) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_err; + + list_add_tail(&entry->list, &free_entries); + } + + num_free_entries = num_entries; + min_free_entries = num_entries; + + printk(KERN_INFO "DMA-API: preallocated %d debug entries\n", + num_entries); + + return 0; + +out_err: + + list_for_each_entry_safe(entry, next_entry, &free_entries, list) { + list_del(&entry->list); + kfree(entry); + } + + return -ENOMEM; +} + +/* + * Let the architectures decide how many entries should be preallocated. + */ +void dma_debug_init(u32 num_entries) +{ + int i; + + if (global_disable) + return; + + for (i = 0; i < HASH_SIZE; ++i) { + INIT_LIST_HEAD(&dma_entry_hash[i].list); + dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; + } + + if (prealloc_memory(num_entries) != 0) { + printk(KERN_ERR "DMA-API: debugging out of memory error " + "- disabled\n"); + global_disable = true; + + return; + } + + printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); +} + -- cgit v1.2.3-59-g8ed1b From 59d3daafa17265f01149df8eab3fb69b9b42cb2e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 13:01:56 +0100 Subject: dma-debug: add kernel command line parameters Impact: add dma_debug= and dma_debug_entries= kernel parameters Signed-off-by: Joerg Roedel --- Documentation/kernel-parameters.txt | 10 ++++++++++ lib/dma-debug.c | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'lib') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54f21a5c262b..0fa3c0545994 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -491,6 +491,16 @@ and is between 256 and 4096 characters. It is defined in the file Range: 0 - 8192 Default: 64 + dma_debug=off If the kernel is compiled with DMA_API_DEBUG support + this option disables the debugging code at boot. + + dma_debug_entries= + This option allows to tune the number of preallocated + entries for DMA-API debugging code. One entry is + required per DMA-API allocation. Use this if the + DMA-API debugging code disables itself because the + architectural default is too low. + hpet= [X86-32,HPET] option to control HPET usage Format: { enable (default) | disable | force } disable: disable HPET and use PIT instead diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 5b50bb31f7c6..2ede46308024 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -64,6 +64,9 @@ static bool global_disable __read_mostly; static u32 num_free_entries; static u32 min_free_entries; +/* number of preallocated entries requested by kernel cmdline */ +static u32 req_entries; + /* * Hash related functions * @@ -253,6 +256,9 @@ void dma_debug_init(u32 num_entries) dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; } + if (req_entries) + num_entries = req_entries; + if (prealloc_memory(num_entries) != 0) { printk(KERN_ERR "DMA-API: debugging out of memory error " "- disabled\n"); @@ -264,3 +270,35 @@ void dma_debug_init(u32 num_entries) printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); } +static __init int dma_debug_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (strncmp(str, "off", 3) == 0) { + printk(KERN_INFO "DMA-API: debugging disabled on kernel " + "command line\n"); + global_disable = true; + } + + return 0; +} + +static __init int dma_debug_entries_cmdline(char *str) +{ + int res; + + if (!str) + return -EINVAL; + + res = get_option(&str, &req_entries); + + if (!res) + req_entries = 0; + + return 0; +} + +__setup("dma_debug=", dma_debug_cmdline); +__setup("dma_debug_entries=", dma_debug_entries_cmdline); + -- cgit v1.2.3-59-g8ed1b From 788dcfa6f17424695823152890d30da09f62f9c3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 13:13:27 +0100 Subject: dma-debug: add debugfs interface Impact: add debugfs interface for configuring DMA-API debugging Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 2ede46308024..20d6cdbcacdc 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -61,12 +62,29 @@ static DEFINE_SPINLOCK(free_entries_lock); /* Global disable flag - will be set in case of an error */ static bool global_disable __read_mostly; +/* Global error count */ +static u32 error_count; + +/* Global error show enable*/ +static u32 show_all_errors __read_mostly; +/* Number of errors to show */ +static u32 show_num_errors = 1; + static u32 num_free_entries; static u32 min_free_entries; /* number of preallocated entries requested by kernel cmdline */ static u32 req_entries; +/* debugfs dentry's for the stuff above */ +static struct dentry *dma_debug_dent __read_mostly; +static struct dentry *global_disable_dent __read_mostly; +static struct dentry *error_count_dent __read_mostly; +static struct dentry *show_all_errors_dent __read_mostly; +static struct dentry *show_num_errors_dent __read_mostly; +static struct dentry *num_free_entries_dent __read_mostly; +static struct dentry *min_free_entries_dent __read_mostly; + /* * Hash related functions * @@ -241,6 +259,58 @@ out_err: return -ENOMEM; } +static int dma_debug_fs_init(void) +{ + dma_debug_dent = debugfs_create_dir("dma-api", NULL); + if (!dma_debug_dent) { + printk(KERN_ERR "DMA-API: can not create debugfs directory\n"); + return -ENOMEM; + } + + global_disable_dent = debugfs_create_bool("disabled", 0444, + dma_debug_dent, + (u32 *)&global_disable); + if (!global_disable_dent) + goto out_err; + + error_count_dent = debugfs_create_u32("error_count", 0444, + dma_debug_dent, &error_count); + if (!error_count_dent) + goto out_err; + + show_all_errors_dent = debugfs_create_u32("all_errors", 0644, + dma_debug_dent, + &show_all_errors); + if (!show_all_errors_dent) + goto out_err; + + show_num_errors_dent = debugfs_create_u32("num_errors", 0644, + dma_debug_dent, + &show_num_errors); + if (!show_num_errors_dent) + goto out_err; + + num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, + dma_debug_dent, + &num_free_entries); + if (!num_free_entries_dent) + goto out_err; + + min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, + dma_debug_dent, + &min_free_entries); + if (!min_free_entries_dent) + goto out_err; + + return 0; + +out_err: + debugfs_remove_recursive(dma_debug_dent); + + return -ENOMEM; +} + + /* * Let the architectures decide how many entries should be preallocated. */ @@ -256,6 +326,14 @@ void dma_debug_init(u32 num_entries) dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; } + if (dma_debug_fs_init() != 0) { + printk(KERN_ERR "DMA-API: error creating debugfs entries " + "- disabling\n"); + global_disable = true; + + return; + } + if (req_entries) num_entries = req_entries; -- cgit v1.2.3-59-g8ed1b From 2d62ece14fe04168a7d16688ddd2d17ac472268c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:10:26 +0100 Subject: dma-debug: add core checking functions Impact: add functions to check on dma unmap and sync Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 20d6cdbcacdc..d0cb47a4211e 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -17,10 +17,13 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include +#include #include +#include #include #include @@ -50,7 +53,7 @@ struct dma_debug_entry { struct hash_bucket { struct list_head list; spinlock_t lock; -} __cacheline_aligned_in_smp; +} ____cacheline_aligned_in_smp; /* Hash list to save the allocated dma addresses */ static struct hash_bucket dma_entry_hash[HASH_SIZE]; @@ -85,6 +88,36 @@ static struct dentry *show_num_errors_dent __read_mostly; static struct dentry *num_free_entries_dent __read_mostly; static struct dentry *min_free_entries_dent __read_mostly; +static const char *type2name[4] = { "single", "page", + "scather-gather", "coherent" }; + +static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE" }; + +/* + * The access to some variables in this macro is racy. We can't use atomic_t + * here because all these variables are exported to debugfs. Some of them even + * writeable. This is also the reason why a lock won't help much. But anyway, + * the races are no big deal. Here is why: + * + * error_count: the addition is racy, but the worst thing that can happen is + * that we don't count some errors + * show_num_errors: the subtraction is racy. Also no big deal because in + * worst case this will result in one warning more in the + * system log than the user configured. This variable is + * writeable via debugfs. + */ +#define err_printk(dev, format, arg...) do { \ + error_count += 1; \ + if (show_all_errors || show_num_errors > 0) { \ + WARN(1, "%s %s: " format, \ + dev_driver_string(dev), \ + dev_name(dev) , ## arg); \ + } \ + if (!show_all_errors && show_num_errors > 0) \ + show_num_errors -= 1; \ + } while (0); + /* * Hash related functions * @@ -380,3 +413,156 @@ static __init int dma_debug_entries_cmdline(char *str) __setup("dma_debug=", dma_debug_cmdline); __setup("dma_debug_entries=", dma_debug_entries_cmdline); +static void check_unmap(struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (dma_mapping_error(ref->dev, ref->dev_addr)) + return; + + bucket = get_hash_bucket(ref, &flags); + entry = hash_bucket_find(bucket, ref); + + if (!entry) { + err_printk(ref->dev, "DMA-API: device driver tries " + "to free DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); + goto out; + } + + if (ref->size != entry->size) { + err_printk(ref->dev, "DMA-API: device driver frees " + "DMA memory with different size " + "[device address=0x%016llx] [map size=%llu bytes] " + "[unmap size=%llu bytes]\n", + ref->dev_addr, entry->size, ref->size); + } + + if (ref->type != entry->type) { + err_printk(ref->dev, "DMA-API: device driver frees " + "DMA memory with wrong function " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s] [unmapped as %s]\n", + ref->dev_addr, ref->size, + type2name[entry->type], type2name[ref->type]); + } else if ((entry->type == dma_debug_coherent) && + (ref->paddr != entry->paddr)) { + err_printk(ref->dev, "DMA-API: device driver frees " + "DMA memory with different CPU address " + "[device address=0x%016llx] [size=%llu bytes] " + "[cpu alloc address=%p] [cpu free address=%p]", + ref->dev_addr, ref->size, + (void *)entry->paddr, (void *)ref->paddr); + } + + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, "DMA-API: device driver frees " + "DMA sg list with different entry count " + "[map count=%d] [unmap count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + + /* + * This may be no bug in reality - but most implementations of the + * DMA API don't handle this properly, so check for it here + */ + if (ref->direction != entry->direction) { + err_printk(ref->dev, "DMA-API: device driver frees " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [unmapped with %s]\n", + ref->dev_addr, ref->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + hash_bucket_del(entry); + dma_entry_free(entry); + +out: + put_hash_bucket(bucket, &flags); +} + +static void check_for_stack(struct device *dev, void *addr) +{ + if (object_is_on_stack(addr)) + err_printk(dev, "DMA-API: device driver maps memory from stack" + " [addr=%p]\n", addr); +} + +static void check_sync(struct device *dev, dma_addr_t addr, + u64 size, u64 offset, int direction, bool to_cpu) +{ + struct dma_debug_entry ref = { + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(&ref, &flags); + + entry = hash_bucket_find(bucket, &ref); + + if (!entry) { + err_printk(dev, "DMA-API: device driver tries " + "to sync DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + addr, size); + goto out; + } + + if ((offset + size) > entry->size) { + err_printk(dev, "DMA-API: device driver syncs" + " DMA memory outside allocated range " + "[device address=0x%016llx] " + "[allocation size=%llu bytes] [sync offset=%llu] " + "[sync size=%llu]\n", entry->dev_addr, entry->size, + offset, size); + } + + if (direction != entry->direction) { + err_printk(dev, "DMA-API: device driver syncs " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + } + + if (entry->direction == DMA_BIDIRECTIONAL) + goto out; + + if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && + !(direction == DMA_TO_DEVICE)) + err_printk(dev, "DMA-API: device driver syncs " + "device read-only DMA memory for cpu " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + + if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && + !(direction == DMA_FROM_DEVICE)) + err_printk(dev, "DMA-API: device driver syncs " + "device write-only DMA memory to device " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + addr, entry->size, + dir2name[entry->direction], + dir2name[direction]); + +out: + put_hash_bucket(bucket, &flags); + +} + -- cgit v1.2.3-59-g8ed1b From f62bc980e6fd26434012c0d5676ecb17179d9ee4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:14:49 +0100 Subject: dma-debug: add checking for map/unmap_page/single Impact: add debug callbacks for dma_{un}map_[page|single] Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 23 ++++++++++++++++++++ lib/dma-debug.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 345d5387a30d..65f73526ba2c 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -28,12 +28,35 @@ struct device; extern void dma_debug_init(u32 num_entries); +extern void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + bool map_single); + +extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single); + + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) { } +static inline void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + bool map_single) +{ +} + +static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, + bool map_single) +{ +} + + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d0cb47a4211e..a2ed2b769685 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -566,3 +566,56 @@ out: } +void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, + size_t size, int direction, dma_addr_t dma_addr, + bool map_single) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(dma_mapping_error(dev, dma_addr))) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->dev = dev; + entry->type = dma_debug_page; + entry->paddr = page_to_phys(page) + offset; + entry->dev_addr = dma_addr; + entry->size = size; + entry->direction = direction; + + if (map_single) { + entry->type = dma_debug_single; + check_for_stack(dev, page_address(page) + offset); + } + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_map_page); + +void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single) +{ + struct dma_debug_entry ref = { + .type = dma_debug_page, + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + + if (unlikely(global_disable)) + return; + + if (map_single) + ref.type = dma_debug_single; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_unmap_page); + -- cgit v1.2.3-59-g8ed1b From 972aa45ceaf65376f33aa75958fcaefc9e752fa4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:19:54 +0100 Subject: dma-debug: add add checking for map/unmap_sg Impact: add debug callbacks for dma_{un}map_sg Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 16 +++++++++++ lib/dma-debug.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 65f73526ba2c..ee9fdb328549 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -23,6 +23,7 @@ #include struct device; +struct scatterlist; #ifdef CONFIG_DMA_API_DEBUG @@ -36,6 +37,11 @@ extern void debug_dma_map_page(struct device *dev, struct page *page, extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single); +extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction); + +extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir); #else /* CONFIG_DMA_API_DEBUG */ @@ -56,6 +62,16 @@ static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, { } +static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ +} + +static inline void debug_dma_unmap_sg(struct device *dev, + struct scatterlist *sglist, + int nelems, int dir) +{ +} #endif /* CONFIG_DMA_API_DEBUG */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index a2ed2b769685..26e40e93e0f2 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -619,3 +620,75 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, } EXPORT_SYMBOL(debug_dma_unmap_page); +void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, mapped_ents, i) { + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_sg; + entry->dev = dev; + entry->paddr = sg_phys(s); + entry->size = s->length; + entry->dev_addr = s->dma_address; + entry->direction = direction; + entry->sg_call_ents = nents; + entry->sg_mapped_ents = mapped_ents; + + check_for_stack(dev, sg_virt(s)); + + add_dma_entry(entry); + } +} +EXPORT_SYMBOL(debug_dma_map_sg); + +void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int mapped_ents = 0, i; + unsigned long flags; + + if (unlikely(global_disable)) + return; + + for_each_sg(sglist, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .paddr = sg_phys(s), + .dev_addr = s->dma_address, + .size = s->length, + .direction = dir, + .sg_call_ents = 0, + }; + + if (mapped_ents && i >= mapped_ents) + break; + + if (mapped_ents == 0) { + struct hash_bucket *bucket; + ref.sg_call_ents = nelems; + bucket = get_hash_bucket(&ref, &flags); + entry = hash_bucket_find(bucket, &ref); + if (entry) + mapped_ents = entry->sg_mapped_ents; + put_hash_bucket(bucket, &flags); + } + + check_unmap(&ref); + } +} +EXPORT_SYMBOL(debug_dma_unmap_sg); + -- cgit v1.2.3-59-g8ed1b From 6bfd4498764d6201399849d2e80fda95db7742c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:38:50 +0100 Subject: dma-debug: add checking for [alloc|free]_coherent Impact: add debug callbacks for dma_[alloc|free]_coherent Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 16 ++++++++++++++++ lib/dma-debug.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index ee9fdb328549..cb72dfd87326 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -43,6 +43,12 @@ extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir); +extern void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt); + +extern void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr); + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) @@ -73,6 +79,16 @@ static inline void debug_dma_unmap_sg(struct device *dev, { } +static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ +} + +static inline void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ +} + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 26e40e93e0f2..44af837f68ef 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -692,3 +692,48 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, } EXPORT_SYMBOL(debug_dma_unmap_sg); +void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(virt == NULL)) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_coherent; + entry->dev = dev; + entry->paddr = virt_to_phys(virt); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = DMA_BIDIRECTIONAL; + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_alloc_coherent); + +void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_coherent, + .dev = dev, + .paddr = virt_to_phys(virt), + .dev_addr = addr, + .size = size, + .direction = DMA_BIDIRECTIONAL, + }; + + if (unlikely(global_disable)) + return; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_free_coherent); + -- cgit v1.2.3-59-g8ed1b From b9d2317e0c4aed02afd20022083b2a485289605d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:43:04 +0100 Subject: dma-debug: add checks for sync_single_* Impact: add debug callbacks for dma_sync_single_for_* functions Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 20 ++++++++++++++++++++ lib/dma-debug.c | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index cb72dfd87326..0eee7af5143f 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -49,6 +49,14 @@ extern void debug_dma_alloc_coherent(struct device *dev, size_t size, extern void debug_dma_free_coherent(struct device *dev, size_t size, void *virt, dma_addr_t addr); +extern void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction); + +extern void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction); + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) @@ -89,6 +97,18 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size, { } +static inline void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + +static inline void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 44af837f68ef..714cfb6ca0e4 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -737,3 +737,24 @@ void debug_dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(debug_dma_free_coherent); +void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, 0, direction, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); + +void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, 0, direction, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_device); + -- cgit v1.2.3-59-g8ed1b From 948408ba3e2a67ed0f95e18ed5be1c622c2c5fc3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 14:55:38 +0100 Subject: dma-debug: add checks for sync_single_range_* Impact: add debug callbacks for dma_sync_single_range_for_* functions Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 27 +++++++++++++++++++++++++++ lib/dma-debug.c | 24 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 0eee7af5143f..e9b903503adb 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -57,6 +57,17 @@ extern void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, int direction); +extern void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction); + +extern void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction); + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) @@ -109,6 +120,22 @@ static inline void debug_dma_sync_single_for_device(struct device *dev, { } +static inline void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction) +{ +} + +static inline void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, + int direction) +{ +} + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 714cfb6ca0e4..d1c0ac1831b7 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -758,3 +758,27 @@ void debug_dma_sync_single_for_device(struct device *dev, } EXPORT_SYMBOL(debug_dma_sync_single_for_device); +void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, offset, direction, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); + +void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction) +{ + if (unlikely(global_disable)) + return; + + check_sync(dev, dma_handle, size, offset, direction, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); + -- cgit v1.2.3-59-g8ed1b From a31fba5d68cebf8f5fefd03e079dab94875e25f5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Jan 2009 15:01:12 +0100 Subject: dma-debug: add checks for sync_single_sg_* Impact: add debug callbacks for dma_sync_sg_* functions Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 20 ++++++++++++++++++++ lib/dma-debug.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index e9b903503adb..4985c6c5237e 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -68,6 +68,14 @@ extern void debug_dma_sync_single_range_for_device(struct device *dev, unsigned long offset, size_t size, int direction); +extern void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); + +extern void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) @@ -136,6 +144,18 @@ static inline void debug_dma_sync_single_range_for_device(struct device *dev, { } +static inline void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} + +static inline void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d1c0ac1831b7..9d11e89c2ee2 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -782,3 +782,35 @@ void debug_dma_sync_single_range_for_device(struct device *dev, } EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); +void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + check_sync(dev, s->dma_address, s->dma_length, 0, + direction, true); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); + +void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + check_sync(dev, s->dma_address, s->dma_length, 0, + direction, false); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_device); + -- cgit v1.2.3-59-g8ed1b From 5ed0cec0ac5f1b3759bdbe4d9df32ee4ff8afb5a Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 19:40:20 +0800 Subject: sched: TIF_NEED_RESCHED -> need_reshed() cleanup Impact: cleanup Use test_tsk_need_resched(), set_tsk_need_resched(), need_resched() instead of using TIF_NEED_RESCHED. Signed-off-by: Lai Jiangshan Cc: Peter Zijlstra LKML-Reference: <49B10BA4.9070209@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 10 +++++----- lib/kernel_lock.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/kernel/sched.c b/kernel/sched.c index 8b92f40c147d..e0fa739a441b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1189,10 +1189,10 @@ static void resched_task(struct task_struct *p) assert_spin_locked(&task_rq(p)->lock); - if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) + if (test_tsk_need_resched(p)) return; - set_tsk_thread_flag(p, TIF_NEED_RESCHED); + set_tsk_need_resched(p); cpu = task_cpu(p); if (cpu == smp_processor_id()) @@ -1248,7 +1248,7 @@ void wake_up_idle_cpu(int cpu) * lockless. The worst case is that the other CPU runs the * idle task through an additional NOOP schedule() */ - set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED); + set_tsk_need_resched(rq->idle); /* NEED_RESCHED must be visible before we test polling */ smp_mb(); @@ -4740,7 +4740,7 @@ asmlinkage void __sched preempt_schedule(void) * between schedule and now. */ barrier(); - } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); + } while (need_resched()); } EXPORT_SYMBOL(preempt_schedule); @@ -4769,7 +4769,7 @@ asmlinkage void __sched preempt_schedule_irq(void) * between schedule and now. */ barrier(); - } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); + } while (need_resched()); } #endif /* CONFIG_PREEMPT */ diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 01a3c22c1b5a..39f1029e3525 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -39,7 +39,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); int __lockfunc __reacquire_kernel_lock(void) { while (!_raw_spin_trylock(&kernel_flag)) { - if (test_thread_flag(TIF_NEED_RESCHED)) + if (need_resched()) return -EAGAIN; cpu_relax(); } -- cgit v1.2.3-59-g8ed1b From 4370aa4aa75391a5e2e06bccb0919109f725ed8e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:46 +0100 Subject: vsprintf: add binary printf Impact: add new APIs for binary trace printk infrastructure vbin_printf(): write args to binary buffer, string is copied when "%s" is occurred. bstr_printf(): read from binary buffer for args and format a string [fweisbec@gmail.com: rebase] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds LKML-Reference: <1236356510-8381-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/string.h | 7 + lib/Kconfig | 3 + lib/vsprintf.c | 442 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 452 insertions(+) (limited to 'lib') diff --git a/include/linux/string.h b/include/linux/string.h index d18fc198aa2f..27ac31784ad2 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -10,6 +10,7 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include extern char *strndup_user(const char __user *, long); @@ -111,6 +112,12 @@ extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); +#ifdef CONFIG_BINARY_PRINTF +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); +#endif + extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available); diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b9083..97d62cf091a7 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -2,6 +2,9 @@ # Library configuration # +config BINARY_PRINTF + def_bool n + menu "Library routines" config BITREVERSE diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 0fbd0121d91d..3543bbe8b1bc 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1058,6 +1058,448 @@ int sprintf(char * buf, const char *fmt, ...) } EXPORT_SYMBOL(sprintf); +#ifdef CONFIG_BINARY_PRINTF +/* + * bprintf service: + * vbin_printf() - VA arguments to binary data + * bstr_printf() - Binary data to text string + */ + +/** + * vbin_printf - Parse a format string and place args' binary value in a buffer + * @bin_buf: The buffer to place args' binary value + * @size: The size of the buffer(by words(32bits), not characters) + * @fmt: The format string to use + * @args: Arguments for the format string + * + * The format follows C99 vsnprintf, except %n is ignored, and its argument + * is skiped. + * + * The return value is the number of words(32bits) which would be generated for + * the given input. + * + * NOTE: + * If the return value is greater than @size, the resulting bin_buf is NOT + * valid for bstr_printf(). + */ +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +{ + char *str, *end; + int qualifier; + + str = (char *)bin_buf; + end = (char *)(bin_buf + size); + +#define save_arg(type) \ +do { \ + if (sizeof(type) == 8) { \ + unsigned long long value; \ + str = PTR_ALIGN(str, sizeof(u32)); \ + value = va_arg(args, unsigned long long); \ + if (str + sizeof(type) <= end) { \ + *(u32 *)str = *(u32 *)&value; \ + *(u32 *)(str + 4) = *((u32 *)&value + 1); \ + } \ + } else { \ + unsigned long value; \ + str = PTR_ALIGN(str, sizeof(type)); \ + value = va_arg(args, int); \ + if (str + sizeof(type) <= end) \ + *(typeof(type) *)str = (type)value; \ + } \ + str += sizeof(type); \ +} while (0) + + for (; *fmt ; ++fmt) { + if (*fmt != '%') + continue; + +repeat: + /* parse flags */ + ++fmt; /* this also skips first '%' */ + if (*fmt == '-' || *fmt == '+' || *fmt == ' ' + || *fmt == '#' || *fmt == '0') + goto repeat; + + /* parse field width */ + if (isdigit(*fmt)) + skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + save_arg(int); + } + + /* parse the precision */ + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + save_arg(int); + } + } + + /* parse the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + + /* parse format type */ + switch (*fmt) { + case 'c': + save_arg(char); + continue; + case 's': { + /* save the string argument */ + const char *save_str = va_arg(args, char *); + size_t len; + if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE + || (unsigned long)save_str < PAGE_SIZE) + save_str = ""; + len = strlen(save_str); + if (str + len + 1 < end) + memcpy(str, save_str, len + 1); + str += len + 1; + continue; + } + case 'p': + save_arg(void *); + /* skip all alphanumeric pointer suffixes */ + while (isalnum(fmt[1])) + fmt++; + continue; + case 'n': { + /* skip %n 's argument */ + void *skip_arg; + if (qualifier == 'l') + skip_arg = va_arg(args, long *); + else if (qualifier == 'Z' || qualifier == 'z') + skip_arg = va_arg(args, size_t *); + else + skip_arg = va_arg(args, int *); + continue; + } + case 'o': + case 'x': + case 'X': + case 'd': + case 'i': + case 'u': + /* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */ + if (qualifier == 'L') + save_arg(long long); + else if (qualifier == 'l') + save_arg(unsigned long); + else if (qualifier == 'Z' || qualifier == 'z') + save_arg(size_t); + else if (qualifier == 't') + save_arg(ptrdiff_t); + else if (qualifier == 'h') + save_arg(short); + else + save_arg(int); + continue; + default: + if (!*fmt) + --fmt; + continue; + } + } +#undef save_arg + + return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; +} +EXPORT_SYMBOL_GPL(vbin_printf); + +/** + * bstr_printf - Format a string from binary arguments and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @bin_buf: Binary arguments for the format string + * + * This function like C99 vsnprintf, but the difference is that vsnprintf gets + * arguments from stack, and bstr_printf gets arguments from @bin_buf which is + * a binary buffer that generated by vbin_printf. + * + * The format follows C99 vsnprintf, but has some extensions: + * %pS output the name of a text symbol + * %pF output the name of a function pointer + * %pR output the address range in a struct resource + * %n is ignored + * + * The return value is the number of characters which would + * be generated for the given input, excluding the trailing + * '\0', as per ISO C99. If you want to have the exact + * number of characters written into @buf as return value + * (not including the trailing '\0'), use vscnprintf(). If the + * return is greater than or equal to @size, the resulting + * string is truncated. + */ +int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +{ + unsigned long long num; + int base; + char *str, *end, c; + const char *args = (const char *)bin_buf; + + int flags; + int field_width; + int precision; + int qualifier; + + if (unlikely((int) size < 0)) { + /* There can be only one.. */ + static char warn = 1; + WARN_ON(warn); + warn = 0; + return 0; + } + + str = buf; + end = buf + size; + +#define get_arg(type) \ +({ \ + typeof(type) value; \ + if (sizeof(type) == 8) { \ + args = PTR_ALIGN(args, sizeof(u32)); \ + *(u32 *)&value = *(u32 *)args; \ + *((u32 *)&value + 1) = *(u32 *)(args + 4); \ + } else { \ + args = PTR_ALIGN(args, sizeof(type)); \ + value = *(typeof(type) *)args; \ + } \ + args += sizeof(type); \ + value; \ +}) + + /* Make sure end is always >= buf */ + if (end < buf) { + end = ((void *)-1); + size = end - buf; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str < end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; +repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = get_arg(int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = get_arg(int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) { + while (--field_width > 0) { + if (str < end) + *str = ' '; + ++str; + } + } + c = (unsigned char) get_arg(char); + if (str < end) + *str = c; + ++str; + while (--field_width > 0) { + if (str < end) + *str = ' '; + ++str; + } + continue; + + case 's':{ + const char *str_arg = args; + size_t len = strlen(str_arg); + args += len + 1; + str = string(str, end, (char *)str_arg, field_width, + precision, flags); + continue; + } + + case 'p': + str = pointer(fmt+1, str, end, get_arg(void *), + field_width, precision, flags); + /* Skip all alphanumeric pointer suffixes */ + while (isalnum(fmt[1])) + fmt++; + continue; + + case 'n': + /* skip %n */ + continue; + + case '%': + if (str < end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (str < end) + *str = '%'; + ++str; + if (*fmt) { + if (str < end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = get_arg(long long); + else if (qualifier == 'l') { + num = get_arg(unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z' || qualifier == 'z') { + num = get_arg(size_t); + } else if (qualifier == 't') { + num = get_arg(ptrdiff_t); + } else if (qualifier == 'h') { + num = (unsigned short) get_arg(short); + if (flags & SIGN) + num = (signed short) num; + } else { + num = get_arg(unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + str = number(str, end, num, base, + field_width, precision, flags); + } + if (size > 0) { + if (str < end) + *str = '\0'; + else + end[-1] = '\0'; + } +#undef get_arg + + /* the trailing null byte doesn't count towards the total */ + return str - buf; +} +EXPORT_SYMBOL_GPL(bstr_printf); + +/** + * bprintf - Parse a format string and place args' binary value in a buffer + * @bin_buf: The buffer to place args' binary value + * @size: The size of the buffer(by words(32bits), not characters) + * @fmt: The format string to use + * @...: Arguments for the format string + * + * The function returns the number of words(u32) written + * into @bin_buf. + */ +int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vbin_printf(bin_buf, size, fmt, args); + va_end(args); + return ret; +} +EXPORT_SYMBOL_GPL(bprintf); + +#endif /* CONFIG_BINARY_PRINTF */ + /** * vsscanf - Unformat a buffer into a list of arguments * @buf: input buffer -- cgit v1.2.3-59-g8ed1b From fef20d9c1380f04ba9492d6463148db07b413708 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Mar 2009 17:21:50 +0100 Subject: vsprintf: unify the format decoding layer for its 3 users An new optimization is making its way to ftrace. Its purpose is to make trace_printk() consuming less memory and be faster. Written by Lai Jiangshan, the approach is to delay the formatting job from tracing time to output time. Currently, a call to trace_printk() will format the whole string and insert it into the ring buffer. Then you can read it on /debug/tracing/trace file. The new implementation stores the address of the format string and the binary parameters into the ring buffer, making the packet more compact and faster to insert. Later, when the user exports the traces, the format string is retrieved with the binary parameters and the formatting job is eventually done. The new implementation rewrites a lot of format decoding bits from vsnprintf() function, making now 3 differents functions to maintain in their duplicated parts of printf format decoding bits. Suggested by Ingo Molnar, this patch tries to factorize the most possible common bits from these functions. The real common part between them is the format decoding. Although they do somewhat similar jobs, their way to export or import the parameters is very different. Thus, only the decoding layer is extracted, unless you see other parts that could be worth factorized. Changes in V2: - Address a suggestion from Linus to group the format_decode() parameters inside a structure. Changes in v3: - Address other cleanups suggested by Ingo and Linus such as passing the printf_spec struct to the format helpers: pointer()/number()/string() Note that this struct is passed by copy and not by address. This is to avoid side effects because these functions often change these values and the changes shoudn't be persistant when a callee helper returns. It would be too risky. - Various cleanups (code alignement, switch/case instead of if/else fountains). - Fix a bug that printed the first format specifier following a %p Changes in v4: - drop unapropriate const qualifier loss while casting fmt to a char * (thanks to Vegard Nossum for having pointed this out). Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-6-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- lib/vsprintf.c | 972 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 541 insertions(+), 431 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3543bbe8b1bc..25f01578c856 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -396,7 +396,38 @@ static noinline char* put_dec(char *buf, unsigned long long num) #define SMALL 32 /* Must be 32 == 0x20 */ #define SPECIAL 64 /* 0x */ -static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type) +enum format_type { + FORMAT_TYPE_NONE, /* Just a string part */ + FORMAT_TYPE_WITDH, + FORMAT_TYPE_PRECISION, + FORMAT_TYPE_CHAR, + FORMAT_TYPE_STR, + FORMAT_TYPE_PTR, + FORMAT_TYPE_PERCENT_CHAR, + FORMAT_TYPE_INVALID, + FORMAT_TYPE_LONG_LONG, + FORMAT_TYPE_ULONG, + FORMAT_TYPE_LONG, + FORMAT_TYPE_USHORT, + FORMAT_TYPE_SHORT, + FORMAT_TYPE_UINT, + FORMAT_TYPE_INT, + FORMAT_TYPE_NRCHARS, + FORMAT_TYPE_SIZE_T, + FORMAT_TYPE_PTRDIFF +}; + +struct printf_spec { + enum format_type type; + int flags; /* flags to number() */ + int field_width; /* width of output field */ + int base; + int precision; /* # of digits/chars */ + int qualifier; +}; + +static char *number(char *buf, char *end, unsigned long long num, + struct printf_spec spec) { /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ @@ -404,32 +435,32 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int char tmp[66]; char sign; char locase; - int need_pfx = ((type & SPECIAL) && base != 10); + int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); int i; /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ - locase = (type & SMALL); - if (type & LEFT) - type &= ~ZEROPAD; + locase = (spec.flags & SMALL); + if (spec.flags & LEFT) + spec.flags &= ~ZEROPAD; sign = 0; - if (type & SIGN) { + if (spec.flags & SIGN) { if ((signed long long) num < 0) { sign = '-'; num = - (signed long long) num; - size--; - } else if (type & PLUS) { + spec.field_width--; + } else if (spec.flags & PLUS) { sign = '+'; - size--; - } else if (type & SPACE) { + spec.field_width--; + } else if (spec.flags & SPACE) { sign = ' '; - size--; + spec.field_width--; } } if (need_pfx) { - size--; - if (base == 16) - size--; + spec.field_width--; + if (spec.base == 16) + spec.field_width--; } /* generate full string in tmp[], in reverse order */ @@ -441,10 +472,10 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int tmp[i++] = (digits[do_div(num,base)] | locase); } while (num != 0); */ - else if (base != 10) { /* 8 or 16 */ - int mask = base - 1; + else if (spec.base != 10) { /* 8 or 16 */ + int mask = spec.base - 1; int shift = 3; - if (base == 16) shift = 4; + if (spec.base == 16) shift = 4; do { tmp[i++] = (digits[((unsigned char)num) & mask] | locase); num >>= shift; @@ -454,12 +485,12 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int } /* printing 100 using %2d gives "100", not "00" */ - if (i > precision) - precision = i; + if (i > spec.precision) + spec.precision = i; /* leading space padding */ - size -= precision; - if (!(type & (ZEROPAD+LEFT))) { - while(--size >= 0) { + spec.field_width -= spec.precision; + if (!(spec.flags & (ZEROPAD+LEFT))) { + while(--spec.field_width >= 0) { if (buf < end) *buf = ' '; ++buf; @@ -476,23 +507,23 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int if (buf < end) *buf = '0'; ++buf; - if (base == 16) { + if (spec.base == 16) { if (buf < end) *buf = ('X' | locase); ++buf; } } /* zero or space padding */ - if (!(type & LEFT)) { - char c = (type & ZEROPAD) ? '0' : ' '; - while (--size >= 0) { + if (!(spec.flags & LEFT)) { + char c = (spec.flags & ZEROPAD) ? '0' : ' '; + while (--spec.field_width >= 0) { if (buf < end) *buf = c; ++buf; } } /* hmm even more zero padding? */ - while (i <= --precision) { + while (i <= --spec.precision) { if (buf < end) *buf = '0'; ++buf; @@ -504,7 +535,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int ++buf; } /* trailing space padding */ - while (--size >= 0) { + while (--spec.field_width >= 0) { if (buf < end) *buf = ' '; ++buf; @@ -512,17 +543,17 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int return buf; } -static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags) +static char *string(char *buf, char *end, char *s, struct printf_spec spec) { int len, i; if ((unsigned long)s < PAGE_SIZE) s = ""; - len = strnlen(s, precision); + len = strnlen(s, spec.precision); - if (!(flags & LEFT)) { - while (len < field_width--) { + if (!(spec.flags & LEFT)) { + while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; @@ -533,7 +564,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio *buf = *s; ++buf; ++s; } - while (len < field_width--) { + while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; @@ -541,21 +572,24 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } -static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags) +static char *symbol_string(char *buf, char *end, void *ptr, + struct printf_spec spec) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; sprint_symbol(sym, value); - return string(buf, end, sym, field_width, precision, flags); + return string(buf, end, sym, spec); #else - field_width = 2*sizeof(void *); - flags |= SPECIAL | SMALL | ZEROPAD; - return number(buf, end, value, 16, field_width, precision, flags); + spec.field_width = 2*sizeof(void *); + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + return number(buf, end, value, spec); #endif } -static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags) +static char *resource_string(char *buf, char *end, struct resource *res, + struct printf_spec spec) { #ifndef IO_RSRC_PRINTK_SIZE #define IO_RSRC_PRINTK_SIZE 4 @@ -564,7 +598,11 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie #ifndef MEM_RSRC_PRINTK_SIZE #define MEM_RSRC_PRINTK_SIZE 8 #endif - + struct printf_spec num_spec = { + .base = 16, + .precision = -1, + .flags = SPECIAL | SMALL | ZEROPAD, + }; /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ char sym[4*sizeof(resource_size_t) + 8]; char *p = sym, *pend = sym + sizeof(sym); @@ -576,17 +614,18 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie size = MEM_RSRC_PRINTK_SIZE; *p++ = '['; - p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD); + num_spec.field_width = size; + p = number(p, pend, res->start, num_spec); *p++ = '-'; - p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD); + p = number(p, pend, res->end, num_spec); *p++ = ']'; *p = 0; - return string(buf, end, sym, field_width, precision, flags); + return string(buf, end, sym, spec); } -static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width, - int precision, int flags) +static char *mac_address_string(char *buf, char *end, u8 *addr, + struct printf_spec spec) { char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */ char *p = mac_addr; @@ -594,16 +633,17 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width, for (i = 0; i < 6; i++) { p = pack_hex_byte(p, addr[i]); - if (!(flags & SPECIAL) && i != 5) + if (!(spec.flags & SPECIAL) && i != 5) *p++ = ':'; } *p = '\0'; + spec.flags &= ~SPECIAL; - return string(buf, end, mac_addr, field_width, precision, flags & ~SPECIAL); + return string(buf, end, mac_addr, spec); } -static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width, - int precision, int flags) +static char *ip6_addr_string(char *buf, char *end, u8 *addr, + struct printf_spec spec) { char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */ char *p = ip6_addr; @@ -612,16 +652,17 @@ static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width, for (i = 0; i < 8; i++) { p = pack_hex_byte(p, addr[2 * i]); p = pack_hex_byte(p, addr[2 * i + 1]); - if (!(flags & SPECIAL) && i != 7) + if (!(spec.flags & SPECIAL) && i != 7) *p++ = ':'; } *p = '\0'; + spec.flags &= ~SPECIAL; - return string(buf, end, ip6_addr, field_width, precision, flags & ~SPECIAL); + return string(buf, end, ip6_addr, spec); } -static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width, - int precision, int flags) +static char *ip4_addr_string(char *buf, char *end, u8 *addr, + struct printf_spec spec) { char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */ char temp[3]; /* hold each IP quad in reverse order */ @@ -637,8 +678,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width, *p++ = '.'; } *p = '\0'; + spec.flags &= ~SPECIAL; - return string(buf, end, ip4_addr, field_width, precision, flags & ~SPECIAL); + return string(buf, end, ip4_addr, spec); } /* @@ -663,41 +705,234 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width, * function pointers are really function descriptors, which contain a * pointer to the real address. */ -static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) +static char *pointer(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) { if (!ptr) - return string(buf, end, "(null)", field_width, precision, flags); + return string(buf, end, "(null)", spec); switch (*fmt) { case 'F': ptr = dereference_function_descriptor(ptr); /* Fallthrough */ case 'S': - return symbol_string(buf, end, ptr, field_width, precision, flags); + return symbol_string(buf, end, ptr, spec); case 'R': - return resource_string(buf, end, ptr, field_width, precision, flags); + return resource_string(buf, end, ptr, spec); case 'm': - flags |= SPECIAL; + spec.flags |= SPECIAL; /* Fallthrough */ case 'M': - return mac_address_string(buf, end, ptr, field_width, precision, flags); + return mac_address_string(buf, end, ptr, spec); case 'i': - flags |= SPECIAL; + spec.flags |= SPECIAL; /* Fallthrough */ case 'I': if (fmt[1] == '6') - return ip6_addr_string(buf, end, ptr, field_width, precision, flags); + return ip6_addr_string(buf, end, ptr, spec); if (fmt[1] == '4') - return ip4_addr_string(buf, end, ptr, field_width, precision, flags); - flags &= ~SPECIAL; + return ip4_addr_string(buf, end, ptr, spec); + spec.flags &= ~SPECIAL; + break; + } + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2*sizeof(void *); + spec.flags |= ZEROPAD; + } + spec.base = 16; + + return number(buf, end, (unsigned long) ptr, spec); +} + +/* + * Helper function to decode printf style format. + * Each call decode a token from the format and return the + * number of characters read (or likely the delta where it wants + * to go on the next call). + * The decoded token is returned through the parameters + * + * 'h', 'l', or 'L' for integer fields + * 'z' support added 23/7/1999 S.H. + * 'z' changed to 'Z' --davidm 1/25/99 + * 't' added for ptrdiff_t + * + * @fmt: the format string + * @type of the token returned + * @flags: various flags such as +, -, # tokens.. + * @field_width: overwritten width + * @base: base of the number (octal, hex, ...) + * @precision: precision of a number + * @qualifier: qualifier of a number (long, size_t, ...) + */ +static int format_decode(const char *fmt, struct printf_spec *spec) +{ + const char *start = fmt; + bool sign = false; + + /* we finished early by reading the field width */ + if (spec->type == FORMAT_TYPE_WITDH) { + if (spec->field_width < 0) { + spec->field_width = -spec->field_width; + spec->flags |= LEFT; + } + spec->type = FORMAT_TYPE_NONE; + goto precision; + } + + /* we finished early by reading the precision */ + if (spec->type == FORMAT_TYPE_PRECISION) { + if (spec->precision < 0) + spec->precision = 0; + + spec->type = FORMAT_TYPE_NONE; + goto qualifier; + } + + /* By default */ + spec->type = FORMAT_TYPE_NONE; + + for (; *fmt ; ++fmt) { + if (*fmt == '%') + break; + } + + /* Return the current non-format string */ + if (fmt != start || !*fmt) + return fmt - start; + + /* Process flags */ + spec->flags = 0; + + while (1) { /* this also skips first '%' */ + bool found = true; + + ++fmt; + + switch (*fmt) { + case '-': spec->flags |= LEFT; break; + case '+': spec->flags |= PLUS; break; + case ' ': spec->flags |= SPACE; break; + case '#': spec->flags |= SPECIAL; break; + case '0': spec->flags |= ZEROPAD; break; + default: found = false; + } + + if (!found) + break; + } + + /* get field width */ + spec->field_width = -1; + + if (isdigit(*fmt)) + spec->field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + /* it's the next argument */ + spec->type = FORMAT_TYPE_WITDH; + return ++fmt - start; + } + +precision: + /* get the precision */ + spec->precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) { + spec->precision = skip_atoi(&fmt); + if (spec->precision < 0) + spec->precision = 0; + } else if (*fmt == '*') { + /* it's the next argument */ + spec->type = FORMAT_TYPE_WITDH; + return ++fmt - start; + } + } + +qualifier: + /* get the conversion qualifier */ + spec->qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { + spec->qualifier = *fmt; + ++fmt; + if (spec->qualifier == 'l' && *fmt == 'l') { + spec->qualifier = 'L'; + ++fmt; + } + } + + /* default base */ + spec->base = 10; + switch (*fmt) { + case 'c': + spec->type = FORMAT_TYPE_CHAR; + return ++fmt - start; + + case 's': + spec->type = FORMAT_TYPE_STR; + return ++fmt - start; + + case 'p': + spec->type = FORMAT_TYPE_PTR; + return fmt - start; + /* skip alnum */ + + case 'n': + spec->type = FORMAT_TYPE_NRCHARS; + return ++fmt - start; + + case '%': + spec->type = FORMAT_TYPE_PERCENT_CHAR; + return ++fmt - start; + + /* integer number formats - set up the flags and "break" */ + case 'o': + spec->base = 8; break; + + case 'x': + spec->flags |= SMALL; + + case 'X': + spec->base = 16; + break; + + case 'd': + case 'i': + sign = true; + case 'u': + break; + + default: + spec->type = FORMAT_TYPE_INVALID; + return fmt - start; } - flags |= SMALL; - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= ZEROPAD; + + if (spec->qualifier == 'L') + spec->type = FORMAT_TYPE_LONG_LONG; + else if (spec->qualifier == 'l') { + if (sign) + spec->type = FORMAT_TYPE_LONG; + else + spec->type = FORMAT_TYPE_ULONG; + } else if (spec->qualifier == 'Z' || spec->qualifier == 'z') { + spec->type = FORMAT_TYPE_SIZE_T; + } else if (spec->qualifier == 't') { + spec->type = FORMAT_TYPE_PTRDIFF; + } else if (spec->qualifier == 'h') { + if (sign) + spec->type = FORMAT_TYPE_SHORT; + else + spec->type = FORMAT_TYPE_USHORT; + } else { + if (sign) + spec->type = FORMAT_TYPE_INT; + else + spec->type = FORMAT_TYPE_UINT; } - return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags); + + return ++fmt - start; } /** @@ -726,18 +961,9 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) { unsigned long long num; - int base; char *str, *end, c; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - /* 't' added for ptrdiff_t */ + int read; + struct printf_spec spec = {0}; /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ @@ -758,184 +984,144 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) size = end - buf; } - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str < end) - *str = *fmt; - ++str; - continue; - } + while (*fmt) { + const char *old_fmt = fmt; - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= LEFT; goto repeat; - case '+': flags |= PLUS; goto repeat; - case ' ': flags |= SPACE; goto repeat; - case '#': flags |= SPECIAL; goto repeat; - case '0': flags |= ZEROPAD; goto repeat; - } + read = format_decode(fmt, &spec); - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= LEFT; - } - } + fmt += read; - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); + switch (spec.type) { + case FORMAT_TYPE_NONE: { + int copy = read; + if (str < end) { + if (copy > end - str) + copy = end - str; + memcpy(str, old_fmt, copy); } - if (precision < 0) - precision = 0; + str += read; + break; } - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || - *fmt =='Z' || *fmt == 'z' || *fmt == 't') { - qualifier = *fmt; - ++fmt; - if (qualifier == 'l' && *fmt == 'l') { - qualifier = 'L'; - ++fmt; - } - } + case FORMAT_TYPE_WITDH: + spec.field_width = va_arg(args, int); + break; - /* default base */ - base = 10; + case FORMAT_TYPE_PRECISION: + spec.precision = va_arg(args, int); + break; - switch (*fmt) { - case 'c': - if (!(flags & LEFT)) { - while (--field_width > 0) { - if (str < end) - *str = ' '; - ++str; - } - } - c = (unsigned char) va_arg(args, int); - if (str < end) - *str = c; - ++str; - while (--field_width > 0) { + case FORMAT_TYPE_CHAR: + if (!(spec.flags & LEFT)) { + while (--spec.field_width > 0) { if (str < end) *str = ' '; ++str; - } - continue; - - case 's': - str = string(str, end, va_arg(args, char *), field_width, precision, flags); - continue; - - case 'p': - str = pointer(fmt+1, str, end, - va_arg(args, void *), - field_width, precision, flags); - /* Skip all alphanumeric pointer suffixes */ - while (isalnum(fmt[1])) - fmt++; - continue; - - case 'n': - /* FIXME: - * What does C99 say about the overflow case here? */ - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z' || qualifier == 'z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - case '%': + } + } + c = (unsigned char) va_arg(args, int); + if (str < end) + *str = c; + ++str; + while (--spec.field_width > 0) { if (str < end) - *str = '%'; + *str = ' '; ++str; - continue; + } + break; - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; + case FORMAT_TYPE_STR: + str = string(str, end, va_arg(args, char *), spec); + break; - case 'x': - flags |= SMALL; - case 'X': - base = 16; - break; + case FORMAT_TYPE_PTR: + str = pointer(fmt+1, str, end, va_arg(args, void *), + spec); + while (isalnum(*fmt)) + fmt++; + break; - case 'd': - case 'i': - flags |= SIGN; - case 'u': - break; + case FORMAT_TYPE_PERCENT_CHAR: + if (str < end) + *str = '%'; + ++str; + break; - default: + case FORMAT_TYPE_INVALID: + if (str < end) + *str = '%'; + ++str; + if (*fmt) { if (str < end) - *str = '%'; + *str = *fmt; ++str; - if (*fmt) { - if (str < end) - *str = *fmt; - ++str; - } else { - --fmt; - } - continue; + } else { + --fmt; + } + break; + + case FORMAT_TYPE_NRCHARS: { + int qualifier = spec.qualifier; + + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z' || + qualifier == 'z') { + size_t *ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + break; } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & SIGN) - num = (signed long) num; - } else if (qualifier == 'Z' || qualifier == 'z') { - num = va_arg(args, size_t); - } else if (qualifier == 't') { - num = va_arg(args, ptrdiff_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & SIGN) - num = (signed int) num; + + default: + switch (spec.type) { + case FORMAT_TYPE_LONG_LONG: + num = va_arg(args, long long); + break; + case FORMAT_TYPE_ULONG: + num = va_arg(args, unsigned long); + break; + case FORMAT_TYPE_LONG: + num = va_arg(args, long); + break; + case FORMAT_TYPE_SIZE_T: + num = va_arg(args, size_t); + break; + case FORMAT_TYPE_PTRDIFF: + num = va_arg(args, ptrdiff_t); + break; + case FORMAT_TYPE_USHORT: + num = (unsigned short) va_arg(args, int); + break; + case FORMAT_TYPE_SHORT: + num = (short) va_arg(args, int); + break; + case FORMAT_TYPE_UINT: + num = va_arg(args, unsigned int); + break; + default: + num = va_arg(args, unsigned int); + } + + str = number(str, end, num, spec); } - str = number(str, end, num, base, - field_width, precision, flags); } + if (size > 0) { if (str < end) *str = '\0'; else end[-1] = '\0'; } + /* the trailing null byte doesn't count towards the total */ return str-buf; + } EXPORT_SYMBOL(vsnprintf); @@ -1084,8 +1270,9 @@ EXPORT_SYMBOL(sprintf); */ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) { + struct printf_spec spec = {0}; char *str, *end; - int qualifier; + int read; str = (char *)bin_buf; end = (char *)(bin_buf + size); @@ -1110,57 +1297,26 @@ do { \ str += sizeof(type); \ } while (0) - for (; *fmt ; ++fmt) { - if (*fmt != '%') - continue; -repeat: - /* parse flags */ - ++fmt; /* this also skips first '%' */ - if (*fmt == '-' || *fmt == '+' || *fmt == ' ' - || *fmt == '#' || *fmt == '0') - goto repeat; + while (*fmt) { + read = format_decode(fmt, &spec); - /* parse field width */ - if (isdigit(*fmt)) - skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - save_arg(int); - } + fmt += read; - /* parse the precision */ - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - save_arg(int); - } - } + switch (spec.type) { + case FORMAT_TYPE_NONE: + break; - /* parse the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || - *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { - qualifier = *fmt; - ++fmt; - if (qualifier == 'l' && *fmt == 'l') { - qualifier = 'L'; - ++fmt; - } - } + case FORMAT_TYPE_WITDH: + case FORMAT_TYPE_PRECISION: + save_arg(int); + break; - /* parse format type */ - switch (*fmt) { - case 'c': + case FORMAT_TYPE_CHAR: save_arg(char); - continue; - case 's': { - /* save the string argument */ + break; + + case FORMAT_TYPE_STR: { const char *save_str = va_arg(args, char *); size_t len; if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE @@ -1170,16 +1326,27 @@ repeat: if (str + len + 1 < end) memcpy(str, save_str, len + 1); str += len + 1; - continue; + break; } - case 'p': + + case FORMAT_TYPE_PTR: save_arg(void *); /* skip all alphanumeric pointer suffixes */ - while (isalnum(fmt[1])) + while (isalnum(*fmt)) fmt++; - continue; - case 'n': { + break; + + case FORMAT_TYPE_PERCENT_CHAR: + break; + + case FORMAT_TYPE_INVALID: + if (!*fmt) + --fmt; + break; + + case FORMAT_TYPE_NRCHARS: { /* skip %n 's argument */ + int qualifier = spec.qualifier; void *skip_arg; if (qualifier == 'l') skip_arg = va_arg(args, long *); @@ -1187,37 +1354,37 @@ repeat: skip_arg = va_arg(args, size_t *); else skip_arg = va_arg(args, int *); - continue; + break; } - case 'o': - case 'x': - case 'X': - case 'd': - case 'i': - case 'u': - /* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */ - if (qualifier == 'L') + + default: + switch (spec.type) { + + case FORMAT_TYPE_LONG_LONG: save_arg(long long); - else if (qualifier == 'l') + break; + case FORMAT_TYPE_ULONG: + case FORMAT_TYPE_LONG: save_arg(unsigned long); - else if (qualifier == 'Z' || qualifier == 'z') + break; + case FORMAT_TYPE_SIZE_T: save_arg(size_t); - else if (qualifier == 't') + break; + case FORMAT_TYPE_PTRDIFF: save_arg(ptrdiff_t); - else if (qualifier == 'h') + break; + case FORMAT_TYPE_USHORT: + case FORMAT_TYPE_SHORT: save_arg(short); - else + break; + default: save_arg(int); - continue; - default: - if (!*fmt) - --fmt; - continue; + } } } -#undef save_arg - return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; + +#undef save_arg } EXPORT_SYMBOL_GPL(vbin_printf); @@ -1249,14 +1416,10 @@ EXPORT_SYMBOL_GPL(vbin_printf); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) { unsigned long long num; - int base; char *str, *end, c; const char *args = (const char *)bin_buf; - int flags; - int field_width; - int precision; - int qualifier; + struct printf_spec spec = {0}; if (unlikely((int) size < 0)) { /* There can be only one.. */ @@ -1290,84 +1453,37 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) size = end - buf; } - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str < end) - *str = *fmt; - ++str; - continue; - } + while (*fmt) { + int read; + const char *old_fmt = fmt; - /* process flags */ - flags = 0; -repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': - flags |= LEFT; - goto repeat; - case '+': - flags |= PLUS; - goto repeat; - case ' ': - flags |= SPACE; - goto repeat; - case '#': - flags |= SPECIAL; - goto repeat; - case '0': - flags |= ZEROPAD; - goto repeat; - } + read = format_decode(fmt, &spec); - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = get_arg(int); - if (field_width < 0) { - field_width = -field_width; - flags |= LEFT; - } - } + fmt += read; - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = get_arg(int); + switch (spec.type) { + case FORMAT_TYPE_NONE: { + int copy = read; + if (str < end) { + if (copy > end - str) + copy = end - str; + memcpy(str, old_fmt, copy); } - if (precision < 0) - precision = 0; + str += read; + break; } - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || - *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { - qualifier = *fmt; - ++fmt; - if (qualifier == 'l' && *fmt == 'l') { - qualifier = 'L'; - ++fmt; - } - } + case FORMAT_TYPE_WITDH: + spec.field_width = get_arg(int); + break; - /* default base */ - base = 10; + case FORMAT_TYPE_PRECISION: + spec.precision = get_arg(int); + break; - switch (*fmt) { - case 'c': - if (!(flags & LEFT)) { - while (--field_width > 0) { + case FORMAT_TYPE_CHAR: + if (!(spec.flags & LEFT)) { + while (--spec.field_width > 0) { if (str < end) *str = ' '; ++str; @@ -1377,58 +1493,34 @@ repeat: if (str < end) *str = c; ++str; - while (--field_width > 0) { + while (--spec.field_width > 0) { if (str < end) *str = ' '; ++str; } - continue; + break; - case 's':{ + case FORMAT_TYPE_STR: { const char *str_arg = args; size_t len = strlen(str_arg); args += len + 1; - str = string(str, end, (char *)str_arg, field_width, - precision, flags); - continue; + str = string(str, end, (char *)str_arg, spec); + break; } - case 'p': - str = pointer(fmt+1, str, end, get_arg(void *), - field_width, precision, flags); - /* Skip all alphanumeric pointer suffixes */ - while (isalnum(fmt[1])) + case FORMAT_TYPE_PTR: + str = pointer(fmt+1, str, end, get_arg(void *), spec); + while (isalnum(*fmt)) fmt++; - continue; - - case 'n': - /* skip %n */ - continue; + break; - case '%': + case FORMAT_TYPE_PERCENT_CHAR: if (str < end) *str = '%'; ++str; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'x': - flags |= SMALL; - case 'X': - base = 16; break; - case 'd': - case 'i': - flags |= SIGN; - case 'u': - break; - - default: + case FORMAT_TYPE_INVALID: if (str < end) *str = '%'; ++str; @@ -1439,36 +1531,54 @@ repeat: } else { --fmt; } - continue; - } - if (qualifier == 'L') - num = get_arg(long long); - else if (qualifier == 'l') { - num = get_arg(unsigned long); - if (flags & SIGN) - num = (signed long) num; - } else if (qualifier == 'Z' || qualifier == 'z') { - num = get_arg(size_t); - } else if (qualifier == 't') { - num = get_arg(ptrdiff_t); - } else if (qualifier == 'h') { - num = (unsigned short) get_arg(short); - if (flags & SIGN) - num = (signed short) num; - } else { - num = get_arg(unsigned int); - if (flags & SIGN) - num = (signed int) num; + break; + + case FORMAT_TYPE_NRCHARS: + /* skip */ + break; + + default: + switch (spec.type) { + + case FORMAT_TYPE_LONG_LONG: + num = get_arg(long long); + break; + case FORMAT_TYPE_ULONG: + num = get_arg(unsigned long); + break; + case FORMAT_TYPE_LONG: + num = get_arg(unsigned long); + break; + case FORMAT_TYPE_SIZE_T: + num = get_arg(size_t); + break; + case FORMAT_TYPE_PTRDIFF: + num = get_arg(ptrdiff_t); + break; + case FORMAT_TYPE_USHORT: + num = get_arg(unsigned short); + break; + case FORMAT_TYPE_SHORT: + num = get_arg(short); + break; + case FORMAT_TYPE_UINT: + num = get_arg(unsigned int); + break; + default: + num = get_arg(int); + } + + str = number(str, end, num, spec); } - str = number(str, end, num, base, - field_width, precision, flags); } + if (size > 0) { if (str < end) *str = '\0'; else end[-1] = '\0'; } + #undef get_arg /* the trailing null byte doesn't count towards the total */ -- cgit v1.2.3-59-g8ed1b From 39e874f8afbdb3745e2406ce4ecbde9ac4cbaa78 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 9 Mar 2009 21:15:04 +0100 Subject: vsprintf: fix bug in negative value printing Sitsofe Wheeler found and bisected that while unifying the vsprintf format decoding in: fef20d9: vsprintf: unify the format decoding layer for its 3 users The sign flag has been dropped out in favour of precise types (ie: LONG/ULONG). But the format helper number() still needs this flag to keep track of the signedness unless it will consider all numbers as unsigned. Also add an explicit cast to int (for %d) while parsing with va_arg() to ensure the highest bit is well extended on the 64 bits number that hosts the value in case of negative values. Reported-Bisected-Tested-by: Sitsofe Wheeler Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Linus Torvalds LKML-Reference: <20090309201503.GA5010@nowhere> Signed-off-by: Ingo Molnar --- lib/vsprintf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 25f01578c856..dc1674377009 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -768,7 +768,6 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, static int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; - bool sign = false; /* we finished early by reading the field width */ if (spec->type == FORMAT_TYPE_WITDH) { @@ -900,7 +899,7 @@ qualifier: case 'd': case 'i': - sign = true; + spec->flags |= SIGN; case 'u': break; @@ -912,7 +911,7 @@ qualifier: if (spec->qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; else if (spec->qualifier == 'l') { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_LONG; else spec->type = FORMAT_TYPE_ULONG; @@ -921,12 +920,12 @@ qualifier: } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; } else if (spec->qualifier == 'h') { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_SHORT; else spec->type = FORMAT_TYPE_USHORT; } else { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_INT; else spec->type = FORMAT_TYPE_UINT; @@ -1101,8 +1100,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_SHORT: num = (short) va_arg(args, int); break; - case FORMAT_TYPE_UINT: - num = va_arg(args, unsigned int); + case FORMAT_TYPE_INT: + num = (int) va_arg(args, int); break; default: num = va_arg(args, unsigned int); -- cgit v1.2.3-59-g8ed1b From 1b23336ad98b3666c216617227c7767cd60a22be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Mar 2009 12:55:52 -0700 Subject: idr: make idr_remove_all() do removal -before- free_layer() Fix a problem in the IDR system, where an idr_remove_all() hands a data element to call_rcu() (via free_layer()) before making that data element inaccessible to new readers. This is very bad, and results in readers still having a reference to this data element at the end of the grace period. Tests on large machines that concurrently map and unmap user-space memory within the same multithreaded process result in crashes within about five minutes. Applying this patch increases the kernel's longevity to the three-to-eight-hour range. There appear to be other similar problems in idr_get_empty_slot() and sub_remove(), but I fixed the easy one in idr_remove_all() first. It is therefore no surprise that failures still occur. Located-by: Milton Miller II Tested-by: Milton Miller II Signed-off-by: Paul E. McKenney Cc: Manfred Spraul Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index c11c5765cdef..dab4bca86f5d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -449,6 +449,7 @@ void idr_remove_all(struct idr *idp) n = idp->layers * IDR_BITS; p = idp->top; + rcu_assign_pointer(idp->top, NULL); max = 1 << n; id = 0; @@ -467,7 +468,6 @@ void idr_remove_all(struct idr *idp) p = *--paa; } } - rcu_assign_pointer(idp->top, NULL); idp->layers = 0; } EXPORT_SYMBOL(idr_remove_all); -- cgit v1.2.3-59-g8ed1b From 908002161247e6e68c478052926b62d9a3d72418 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 11 Mar 2009 23:18:32 +0800 Subject: nlattr: Fix build error with NET off We moved the netlink attribute support from net to lib in order for it to be available for general consumption. However, parts of the code (the bits that we don't need :) really depends on NET because the target object is sk_buff. This patch fixes this by wrapping them in CONFIG_NET. Some EXPORTs have been moved to make this work. Tested-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- lib/nlattr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index 56c3ce7fe29a..80009a24e21d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -281,6 +281,7 @@ int nla_strcmp(const struct nlattr *nla, const char *str) return d; } +#ifdef CONFIG_NET /** * __nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on @@ -305,6 +306,7 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return nla; } +EXPORT_SYMBOL(__nla_reserve); /** * __nla_reserve_nohdr - reserve room for attribute without header @@ -325,6 +327,7 @@ void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) return start; } +EXPORT_SYMBOL(__nla_reserve_nohdr); /** * nla_reserve - reserve room for attribute on the skb @@ -345,6 +348,7 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return __nla_reserve(skb, attrtype, attrlen); } +EXPORT_SYMBOL(nla_reserve); /** * nla_reserve_nohdr - reserve room for attribute without header @@ -363,6 +367,7 @@ void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) return __nla_reserve_nohdr(skb, attrlen); } +EXPORT_SYMBOL(nla_reserve_nohdr); /** * __nla_put - Add a netlink attribute to a socket buffer @@ -382,6 +387,7 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, nla = __nla_reserve(skb, attrtype, attrlen); memcpy(nla_data(nla), data, attrlen); } +EXPORT_SYMBOL(__nla_put); /** * __nla_put_nohdr - Add a netlink attribute without header @@ -399,6 +405,7 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) start = __nla_reserve_nohdr(skb, attrlen); memcpy(start, data, attrlen); } +EXPORT_SYMBOL(__nla_put_nohdr); /** * nla_put - Add a netlink attribute to a socket buffer @@ -418,6 +425,7 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) __nla_put(skb, attrtype, attrlen, data); return 0; } +EXPORT_SYMBOL(nla_put); /** * nla_put_nohdr - Add a netlink attribute without header @@ -436,6 +444,7 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) __nla_put_nohdr(skb, attrlen, data); return 0; } +EXPORT_SYMBOL(nla_put_nohdr); /** * nla_append - Add a netlink attribute without header or padding @@ -454,20 +463,13 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data) memcpy(skb_put(skb, attrlen), data, attrlen); return 0; } +EXPORT_SYMBOL(nla_append); +#endif EXPORT_SYMBOL(nla_validate); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(__nla_reserve); -EXPORT_SYMBOL(__nla_reserve_nohdr); -EXPORT_SYMBOL(nla_reserve); -EXPORT_SYMBOL(nla_reserve_nohdr); -EXPORT_SYMBOL(__nla_put); -EXPORT_SYMBOL(__nla_put_nohdr); -EXPORT_SYMBOL(nla_put); -EXPORT_SYMBOL(nla_put_nohdr); EXPORT_SYMBOL(nla_memcpy); EXPORT_SYMBOL(nla_memcmp); EXPORT_SYMBOL(nla_strcmp); -EXPORT_SYMBOL(nla_append); -- cgit v1.2.3-59-g8ed1b From d820ac4c2fa881079e6b689d2098adce337558ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 01:30:40 +0100 Subject: locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit] Impact: cleanup The naming clashes with upcoming softirq tracepoints, so rename the APIs to lockdep_*(). Requested-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 8 ++++---- kernel/softirq.c | 4 ++-- lib/locking-selftest.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 74bde13224c9..b02a3f1d46a0 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -24,8 +24,8 @@ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define trace_softirq_enter() do { current->softirq_context++; } while (0) -# define trace_softirq_exit() do { current->softirq_context--; } while (0) +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) @@ -38,8 +38,8 @@ # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) # define trace_hardirq_exit() do { } while (0) -# define trace_softirq_enter() do { } while (0) -# define trace_softirq_exit() do { } while (0) +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) # define INIT_TRACE_IRQFLAGS #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 9041ea7948fe..08a030f85416 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -180,7 +180,7 @@ asmlinkage void __do_softirq(void) account_system_vtime(current); __local_bh_disable((unsigned long)__builtin_return_address(0)); - trace_softirq_enter(); + lockdep_softirq_enter(); cpu = smp_processor_id(); restart: @@ -220,7 +220,7 @@ restart: if (pending) wakeup_softirqd(); - trace_softirq_exit(); + lockdep_softirq_exit(); account_system_vtime(current); _local_bh_enable(); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 280332c1827c..619313ed6c46 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -157,11 +157,11 @@ static void init_shared_classes(void) #define SOFTIRQ_ENTER() \ local_bh_disable(); \ local_irq_disable(); \ - trace_softirq_enter(); \ + lockdep_softirq_enter(); \ WARN_ON(!in_softirq()); #define SOFTIRQ_EXIT() \ - trace_softirq_exit(); \ + lockdep_softirq_exit(); \ local_irq_enable(); \ local_bh_enable(); -- cgit v1.2.3-59-g8ed1b From aa8e4fc68d8024cd3132035d13c3cefa7baeac8f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 Mar 2009 19:32:51 -0700 Subject: bitmap: fix end condition in bitmap_find_free_region Guennadi Liakhovetski noticed that the end condition for the loop in bitmap_find_free_region() is wrong, and the "return if error" was also using the wrong conditional that would only trigger if the bitmap was an exact multiple of the allocation size, which is not necessarily the case with dma_alloc_from_coherent(). Such a failure would end up in bitmap_find_free_region() accessing beyond the end of the bitmap. Reported-by: Guennadi Liakhovetski Cc: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 1338469ac849..35a1f7ff4149 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -948,15 +948,15 @@ done: */ int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) { - int pos; /* scans bitmap by regions of size order */ + int pos, end; /* scans bitmap by regions of size order */ - for (pos = 0; pos < bits; pos += (1 << order)) - if (__reg_op(bitmap, pos, order, REG_OP_ISFREE)) - break; - if (pos == bits) - return -ENOMEM; - __reg_op(bitmap, pos, order, REG_OP_ALLOC); - return pos; + for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { + if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) + continue; + __reg_op(bitmap, pos, order, REG_OP_ALLOC); + return pos; + } + return -ENOMEM; } EXPORT_SYMBOL(bitmap_find_free_region); -- cgit v1.2.3-59-g8ed1b From adf26f84a62b492e002d3b75af671f23ddd3be0a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 14 Mar 2009 12:08:50 +0100 Subject: fix regression from "vsprintf: unify the format decoding layer for its 3 users" Jeremy Fitzhardinge reported: > Change fef20d9c1380f04ba9492d6463148db07b413708, "vsprintf: > unify the format decoding layer for its 3 users", causes a > regression in xenbus which results in no devices getting > attached to a new domain. %.*s is broken - fix it. Reported-by: Jeremy Fitzhardinge Cc: Frederic Weisbecker Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index dc1674377009..708e505ce81f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -843,7 +843,7 @@ precision: spec->precision = 0; } else if (*fmt == '*') { /* it's the next argument */ - spec->type = FORMAT_TYPE_WITDH; + spec->type = FORMAT_TYPE_PRECISION; return ++fmt - start; } } -- cgit v1.2.3-59-g8ed1b From ed681a91ab805341675d166a9592551093c0a2d9 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 14 Mar 2009 12:08:50 +0100 Subject: vsprintf: unify the format decoding layer for its 3 users, cleanup Impact: cleanup Rename FORMAT_TYPE_WITDH to => FORMAT_TYPE_WIDTH Cc: Frederic Weisbecker Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- lib/vsprintf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 708e505ce81f..be3001f912e4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -398,7 +398,7 @@ static noinline char* put_dec(char *buf, unsigned long long num) enum format_type { FORMAT_TYPE_NONE, /* Just a string part */ - FORMAT_TYPE_WITDH, + FORMAT_TYPE_WIDTH, FORMAT_TYPE_PRECISION, FORMAT_TYPE_CHAR, FORMAT_TYPE_STR, @@ -770,7 +770,7 @@ static int format_decode(const char *fmt, struct printf_spec *spec) const char *start = fmt; /* we finished early by reading the field width */ - if (spec->type == FORMAT_TYPE_WITDH) { + if (spec->type == FORMAT_TYPE_WIDTH) { if (spec->field_width < 0) { spec->field_width = -spec->field_width; spec->flags |= LEFT; @@ -828,7 +828,7 @@ static int format_decode(const char *fmt, struct printf_spec *spec) spec->field_width = skip_atoi(&fmt); else if (*fmt == '*') { /* it's the next argument */ - spec->type = FORMAT_TYPE_WITDH; + spec->type = FORMAT_TYPE_WIDTH; return ++fmt - start; } @@ -1002,7 +1002,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; } - case FORMAT_TYPE_WITDH: + case FORMAT_TYPE_WIDTH: spec.field_width = va_arg(args, int); break; @@ -1306,7 +1306,7 @@ do { \ case FORMAT_TYPE_NONE: break; - case FORMAT_TYPE_WITDH: + case FORMAT_TYPE_WIDTH: case FORMAT_TYPE_PRECISION: save_arg(int); break; @@ -1472,7 +1472,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; } - case FORMAT_TYPE_WITDH: + case FORMAT_TYPE_WIDTH: spec.field_width = get_arg(int); break; -- cgit v1.2.3-59-g8ed1b From 1be1cb7b47f0744141ed61cdb25648819ae1a56f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Mar 2009 18:53:18 +0100 Subject: debugobjects: replace static objects when slab cache becomes available Impact: refactor/consolidate object management, prepare for delayed free debugobjects allocates static reference objects to track objects which are initialized or activated before the slab cache becomes available. These static reference objects have to be handled seperately in free_object(). The handling of these objects is in the way of implementing a delayed free functionality. The delayed free is required to avoid callbacks into the mm code from debug_check_no_obj_freed(). Replace the static object references with dynamic ones after the slab cache has been initialized. The static objects are now marked initdata. Signed-off-by: Thomas Gleixner LKML-Reference: <200903162049.58058.nickpiggin@yahoo.com.au> --- lib/debugobjects.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 90e46fa12721..fdcda3dbcd35 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -30,7 +30,7 @@ struct debug_bucket { static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; -static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE]; +static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_SPINLOCK(pool_lock); @@ -883,6 +883,63 @@ void __init debug_objects_early_init(void) hlist_add_head(&obj_static_pool[i].node, &obj_pool); } +/* + * Convert the statically allocated objects to dynamic ones: + */ +static int debug_objects_replace_static_objects(void) +{ + struct debug_bucket *db = obj_hash; + struct hlist_node *node, *tmp; + struct debug_obj *obj, *new; + HLIST_HEAD(objects); + int i, cnt = 0; + + for (i = 0; i < ODEBUG_POOL_SIZE; i++) { + obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); + if (!obj) + goto free; + hlist_add_head(&obj->node, &objects); + } + + /* + * When debug_objects_mem_init() is called we know that only + * one CPU is up, so disabling interrupts is enough + * protection. This avoids the lockdep hell of lock ordering. + */ + local_irq_disable(); + + /* Remove the statically allocated objects from the pool */ + hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node) + hlist_del(&obj->node); + /* Move the allocated objects to the pool */ + hlist_move_list(&objects, &obj_pool); + + /* Replace the active object references */ + for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + hlist_move_list(&db->list, &objects); + + hlist_for_each_entry(obj, node, &objects, node) { + new = hlist_entry(obj_pool.first, typeof(*obj), node); + hlist_del(&new->node); + /* copy object data */ + *new = *obj; + hlist_add_head(&new->node, &db->list); + cnt++; + } + } + + printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, + obj_pool_used); + local_irq_enable(); + return 0; +free: + hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + } + return -ENOMEM; +} + /* * Called after the kmem_caches are functional to setup a dedicated * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag @@ -898,8 +955,11 @@ void __init debug_objects_mem_init(void) sizeof (struct debug_obj), 0, SLAB_DEBUG_OBJECTS, NULL); - if (!obj_cache) + if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; - else + if (obj_cache) + kmem_cache_destroy(obj_cache); + printk(KERN_WARNING "ODEBUG: out of memory.\n"); + } else debug_objects_selftest(); } -- cgit v1.2.3-59-g8ed1b From 337fff8b5ed0573ea106491c6de47bd7fe623500 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Mar 2009 10:04:53 +0100 Subject: debugobjects: delay free of internal objects Impact: avoid recursive kfree calls, less slab activity on heavy load debugobjects checks on kfree whether tracked objects are freed. When a tracked object is freed debugobjects frees the internal reference object as well. The debug object slab cache is marked to not recurse into debugobjects when a slab objects is freed, but the recursive call can be problematic versus locking in the memory allocator. Defer the freeing of debug slab objects via schedule_work. The reasons not to use RCU are: 1) rcu makes the data structure larger 2) there is no real need for rcu as nothing references the obj after we freed it 3) under heavy load it is easier to reuse the to be freed objects instead of allocating new objects from the slab. This lowered the slab activity significantly in a heavy load networking test where lots of timers are created/destroyed. The workqueue based delayed free allows us just to put the to be freed objects back into the object pool and reuse them right away. Signed-off-by: Thomas Gleixner LKML-Reference: <200903162049.58058.nickpiggin@yahoo.com.au> --- lib/debugobjects.c | 53 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index fdcda3dbcd35..2755a3bd16a1 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -50,6 +50,9 @@ static int debug_objects_enabled __read_mostly static struct debug_obj_descr *descr_test __read_mostly; +static void free_obj_work(struct work_struct *work); +static DECLARE_WORK(debug_obj_work, free_obj_work); + static int __init enable_object_debug(char *str) { debug_objects_enabled = 1; @@ -154,25 +157,51 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) } /* - * Put the object back into the pool or give it back to kmem_cache: + * workqueue function to free objects. */ -static void free_object(struct debug_obj *obj) +static void free_obj_work(struct work_struct *work) { - unsigned long idx = (unsigned long)(obj - obj_static_pool); + struct debug_obj *obj; unsigned long flags; - if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) { - spin_lock_irqsave(&pool_lock, flags); - hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; - obj_pool_used--; - spin_unlock_irqrestore(&pool_lock, flags); - } else { - spin_lock_irqsave(&pool_lock, flags); - obj_pool_used--; + spin_lock_irqsave(&pool_lock, flags); + while (obj_pool_free > ODEBUG_POOL_SIZE) { + obj = hlist_entry(obj_pool.first, typeof(*obj), node); + hlist_del(&obj->node); + obj_pool_free--; + /* + * We release pool_lock across kmem_cache_free() to + * avoid contention on pool_lock. + */ spin_unlock_irqrestore(&pool_lock, flags); kmem_cache_free(obj_cache, obj); + spin_lock_irqsave(&pool_lock, flags); } + spin_unlock_irqrestore(&pool_lock, flags); +} + +/* + * Put the object back into the pool and schedule work to free objects + * if necessary. + */ +static void free_object(struct debug_obj *obj) +{ + unsigned long flags; + int sched = 0; + + spin_lock_irqsave(&pool_lock, flags); + /* + * schedule work when the pool is filled and the cache is + * initialized: + */ + if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) + sched = !work_pending(&debug_obj_work); + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + obj_pool_used--; + spin_unlock_irqrestore(&pool_lock, flags); + if (sched) + schedule_work(&debug_obj_work); } /* -- cgit v1.2.3-59-g8ed1b From ac26c18bd35d982d1ba06020a992b1085fefc3e2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 12 Feb 2009 16:19:13 +0100 Subject: dma-debug: add function to dump dma mappings This adds a function to dump the DMA mappings that the debugging code is aware of -- either for a single device, or for _all_ devices. This can be useful for debugging -- sticking a call to it in the DMA page fault handler, for example, to see if the faulting address _should_ be mapped or not, and hence work out whether it's IOMMU bugs we're seeing, or driver bugs. Signed-off-by: David Woodhouse --- include/linux/dma-debug.h | 6 ++++++ lib/dma-debug.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 4985c6c5237e..46a11c10da04 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -76,6 +76,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction); +extern void debug_dma_dump_mappings(struct device *dev); + #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_init(u32 num_entries) @@ -156,6 +158,10 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev, { } +static inline void debug_dma_dump_mappings(struct device *dev) +{ +} + #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 9d11e89c2ee2..91ed1dfdbaac 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -193,6 +193,36 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +/* + * Dump mapping entries for debugging purposes + */ +void debug_dma_dump_mappings(struct device *dev) +{ + int idx; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + dev_info(entry->dev, + "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + type2name[entry->type], idx, + (unsigned long long)entry->paddr, + entry->dev_addr, entry->size, + dir2name[entry->direction]); + } + } + + spin_unlock_irqrestore(&bucket->lock, flags); + } +} +EXPORT_SYMBOL(debug_dma_dump_mappings); + /* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. -- cgit v1.2.3-59-g8ed1b From 6c132d1bcdc716e898b4092bb1abc696505c37e7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Jan 2009 16:52:39 +0100 Subject: dma-debug: print stacktrace of mapping path on unmap error Impact: saves stacktrace of a dma mapping and prints it if there is an error Signed-off-by: David Woodhouse Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 91ed1dfdbaac..dba02f138bd3 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,8 @@ enum { dma_debug_coherent, }; +#define DMA_DEBUG_STACKTRACE_ENTRIES 5 + struct dma_debug_entry { struct list_head list; struct device *dev; @@ -49,6 +52,10 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; +#ifdef CONFIG_STACKTRACE + struct stack_trace stacktrace; + unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; +#endif }; struct hash_bucket { @@ -108,12 +115,23 @@ static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", * system log than the user configured. This variable is * writeable via debugfs. */ -#define err_printk(dev, format, arg...) do { \ +static inline void dump_entry_trace(struct dma_debug_entry *entry) +{ +#ifdef CONFIG_STACKTRACE + if (entry) { + printk(KERN_WARNING "Mapped at:\n"); + print_stack_trace(&entry->stacktrace, 0); + } +#endif +} + +#define err_printk(dev, entry, format, arg...) do { \ error_count += 1; \ if (show_all_errors || show_num_errors > 0) { \ WARN(1, "%s %s: " format, \ dev_driver_string(dev), \ dev_name(dev) , ## arg); \ + dump_entry_trace(entry); \ } \ if (!show_all_errors && show_num_errors > 0) \ show_num_errors -= 1; \ @@ -260,6 +278,12 @@ static struct dma_debug_entry *dma_entry_alloc(void) list_del(&entry->list); memset(entry, 0, sizeof(*entry)); +#ifdef CONFIG_STACKTRACE + entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; + entry->stacktrace.entries = entry->st_entries; + entry->stacktrace.skip = 2; + save_stack_trace(&entry->stacktrace); +#endif num_free_entries -= 1; if (num_free_entries < min_free_entries) min_free_entries = num_free_entries; @@ -457,7 +481,7 @@ static void check_unmap(struct dma_debug_entry *ref) entry = hash_bucket_find(bucket, ref); if (!entry) { - err_printk(ref->dev, "DMA-API: device driver tries " + err_printk(ref->dev, NULL, "DMA-API: device driver tries " "to free DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", ref->dev_addr, ref->size); @@ -465,7 +489,7 @@ static void check_unmap(struct dma_debug_entry *ref) } if (ref->size != entry->size) { - err_printk(ref->dev, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different size " "[device address=0x%016llx] [map size=%llu bytes] " "[unmap size=%llu bytes]\n", @@ -473,7 +497,7 @@ static void check_unmap(struct dma_debug_entry *ref) } if (ref->type != entry->type) { - err_printk(ref->dev, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with wrong function " "[device address=0x%016llx] [size=%llu bytes] " "[mapped as %s] [unmapped as %s]\n", @@ -481,7 +505,7 @@ static void check_unmap(struct dma_debug_entry *ref) type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && (ref->paddr != entry->paddr)) { - err_printk(ref->dev, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=%p] [cpu free address=%p]", @@ -491,7 +515,7 @@ static void check_unmap(struct dma_debug_entry *ref) if (ref->sg_call_ents && ref->type == dma_debug_sg && ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA sg list with different entry count " "[map count=%d] [unmap count=%d]\n", entry->sg_call_ents, ref->sg_call_ents); @@ -502,7 +526,7 @@ static void check_unmap(struct dma_debug_entry *ref) * DMA API don't handle this properly, so check for it here */ if (ref->direction != entry->direction) { - err_printk(ref->dev, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different direction " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [unmapped with %s]\n", @@ -521,8 +545,8 @@ out: static void check_for_stack(struct device *dev, void *addr) { if (object_is_on_stack(addr)) - err_printk(dev, "DMA-API: device driver maps memory from stack" - " [addr=%p]\n", addr); + err_printk(dev, NULL, "DMA-API: device driver maps memory from" + "stack [addr=%p]\n", addr); } static void check_sync(struct device *dev, dma_addr_t addr, @@ -543,7 +567,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, entry = hash_bucket_find(bucket, &ref); if (!entry) { - err_printk(dev, "DMA-API: device driver tries " + err_printk(dev, NULL, "DMA-API: device driver tries " "to sync DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", addr, size); @@ -551,7 +575,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, } if ((offset + size) > entry->size) { - err_printk(dev, "DMA-API: device driver syncs" + err_printk(dev, entry, "DMA-API: device driver syncs" " DMA memory outside allocated range " "[device address=0x%016llx] " "[allocation size=%llu bytes] [sync offset=%llu] " @@ -560,7 +584,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, } if (direction != entry->direction) { - err_printk(dev, "DMA-API: device driver syncs " + err_printk(dev, entry, "DMA-API: device driver syncs " "DMA memory with different direction " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", @@ -574,7 +598,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && !(direction == DMA_TO_DEVICE)) - err_printk(dev, "DMA-API: device driver syncs " + err_printk(dev, entry, "DMA-API: device driver syncs " "device read-only DMA memory for cpu " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", @@ -584,7 +608,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && !(direction == DMA_FROM_DEVICE)) - err_printk(dev, "DMA-API: device driver syncs " + err_printk(dev, entry, "DMA-API: device driver syncs " "device write-only DMA memory to device " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", -- cgit v1.2.3-59-g8ed1b From 2e34bde18576a02c897ae6b699ea26301d92be1b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Mar 2009 16:51:55 +0100 Subject: dma-debug: add checks for kernel text and rodata Impact: get notified if a device dma maps illegal areas This patch adds a check to print a warning message when a device driver tries to map a memory area from the kernel text segment or rodata. Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index dba02f138bd3..6022eb4a0cd0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -29,6 +29,8 @@ #include #include +#include + #define HASH_SIZE 1024ULL #define HASH_FN_SHIFT 13 #define HASH_FN_MASK (HASH_SIZE - 1) @@ -549,6 +551,24 @@ static void check_for_stack(struct device *dev, void *addr) "stack [addr=%p]\n", addr); } +static inline bool overlap(void *addr, u64 size, void *start, void *end) +{ + void *addr2 = (char *)addr + size; + + return ((addr >= start && addr < end) || + (addr2 >= start && addr2 < end) || + ((addr < start) && (addr2 >= end))); +} + +static void check_for_illegal_area(struct device *dev, void *addr, u64 size) +{ + if (overlap(addr, size, _text, _etext) || + overlap(addr, size, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps " + "memory from kernel text or rodata " + "[addr=%p] [size=%llu]\n", addr, size); +} + static void check_sync(struct device *dev, dma_addr_t addr, u64 size, u64 offset, int direction, bool to_cpu) { @@ -645,8 +665,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->direction = direction; if (map_single) { + void *addr = ((char *)page_address(page)) + offset; + entry->type = dma_debug_single; - check_for_stack(dev, page_address(page) + offset); + check_for_stack(dev, addr); + check_for_illegal_area(dev, addr, size); } add_dma_entry(entry); @@ -699,6 +722,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_mapped_ents = mapped_ents; check_for_stack(dev, sg_virt(s)); + check_for_illegal_area(dev, sg_virt(s), s->length); add_dma_entry(entry); } -- cgit v1.2.3-59-g8ed1b From 41531c8f5f05aba5ec645d9770557eedbf75b422 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Mar 2009 17:32:14 +0100 Subject: dma-debug: add a check dma memory leaks Impact: allow architectures to monitor busses for dma mem leakage This patch adds checking code to detect if a device has pending DMA operations when it is about to be unbound from its device driver. Signed-off-by: Joerg Roedel --- include/linux/dma-debug.h | 7 ++++++ lib/dma-debug.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 46a11c10da04..e851d23e91eb 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -24,9 +24,12 @@ struct device; struct scatterlist; +struct bus_type; #ifdef CONFIG_DMA_API_DEBUG +extern void dma_debug_add_bus(struct bus_type *bus); + extern void dma_debug_init(u32 num_entries); extern void debug_dma_map_page(struct device *dev, struct page *page, @@ -80,6 +83,10 @@ extern void debug_dma_dump_mappings(struct device *dev); #else /* CONFIG_DMA_API_DEBUG */ +void dma_debug_add_bus(struct bus_type *bus) +{ +} + static inline void dma_debug_init(u32 num_entries) { } diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 6022eb4a0cd0..9a350b414a50 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -400,6 +400,61 @@ out_err: return -ENOMEM; } +static int device_dma_allocations(struct device *dev) +{ + struct dma_debug_entry *entry; + unsigned long flags; + int count = 0, i; + + for (i = 0; i < HASH_SIZE; ++i) { + spin_lock_irqsave(&dma_entry_hash[i].lock, flags); + list_for_each_entry(entry, &dma_entry_hash[i].list, list) { + if (entry->dev == dev) + count += 1; + } + spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); + } + + return count; +} + +static int dma_debug_device_change(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + int count; + + + switch (action) { + case BUS_NOTIFY_UNBIND_DRIVER: + count = device_dma_allocations(dev); + if (count == 0) + break; + err_printk(dev, NULL, "DMA-API: device driver has pending " + "DMA allocations while released from device " + "[count=%d]\n", count); + break; + default: + break; + } + + return 0; +} + +void dma_debug_add_bus(struct bus_type *bus) +{ + struct notifier_block *nb; + + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (nb == NULL) { + printk(KERN_ERR "dma_debug_add_bus: out of memory\n"); + return; + } + + nb->notifier_call = dma_debug_device_change; + + bus_register_notifier(bus, nb); +} /* * Let the architectures decide how many entries should be preallocated. -- cgit v1.2.3-59-g8ed1b From 35d40952dba7b0689a16bd1463fb7698f8dbe639 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 19 Mar 2009 10:39:31 +0900 Subject: dma-debug: warn of unmapping an invalid dma address Impact: extend DMA-debug checks Calling dma_unmap families against an invalid dma address should be a bug. Signed-off-by: FUJITA Tomonori Cc: Joerg Roedel LKML-Reference: <20090319103743N.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- lib/dma-debug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 9a350b414a50..f9e6d38b4b34 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -531,8 +531,11 @@ static void check_unmap(struct dma_debug_entry *ref) struct hash_bucket *bucket; unsigned long flags; - if (dma_mapping_error(ref->dev, ref->dev_addr)) + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, "DMA-API: device driver tries " + "to free an invalid DMA memory address\n"); return; + } bucket = get_hash_bucket(ref, &flags); entry = hash_bucket_find(bucket, ref); -- cgit v1.2.3-59-g8ed1b From 9537a48ed4b9e4b738943d6da0a0fd4278adf905 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Mar 2009 15:35:08 +0100 Subject: dma-debug: make memory range checks more consistent Impact: extend on-kernel-stack DMA debug checks to all !highmem pages We only checked dma_map_single() - extend it to dma_map_page() and dma_map_sg() as well. Also, fix dma_map_single() corner case bug: make sure we dont stack-check highmem (not mapped) pages. Reported-by: FUJITA Tomonori Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org LKML-Reference: <1237818908-26516-1-git-send-email-joerg.roedel@amd.com> Signed-off-by: Ingo Molnar --- lib/dma-debug.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index f9e6d38b4b34..1a992089486c 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -722,10 +722,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->size = size; entry->direction = direction; - if (map_single) { - void *addr = ((char *)page_address(page)) + offset; - + if (map_single) entry->type = dma_debug_single; + + if (!PageHighMem(page)) { + void *addr = ((char *)page_address(page)) + offset; check_for_stack(dev, addr); check_for_illegal_area(dev, addr, size); } @@ -779,8 +780,10 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_virt(s)); - check_for_illegal_area(dev, sg_virt(s), s->length); + if (!PageHighMem(sg_page(s))) { + check_for_stack(dev, sg_virt(s)); + check_for_illegal_area(dev, sg_virt(s), s->length); + } add_dma_entry(entry); } -- cgit v1.2.3-59-g8ed1b From 1fa5ae857bb14f6046205171d98506d8112dd74e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 25 Jan 2009 15:17:37 +0100 Subject: driver core: get rid of struct device's bus_id string array Now that all users of bus_id is gone, we can remove it from struct device. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 39 +++++++++++++++++++-------------------- include/linux/device.h | 4 +--- include/linux/kobject.h | 2 ++ lib/kobject.c | 2 +- 4 files changed, 23 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/drivers/base/core.c b/drivers/base/core.c index f3eae630e589..059966b617f6 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -777,17 +777,12 @@ static void device_remove_class_symlinks(struct device *dev) int dev_set_name(struct device *dev, const char *fmt, ...) { va_list vargs; - char *s; + int err; va_start(vargs, fmt); - vsnprintf(dev->bus_id, sizeof(dev->bus_id), fmt, vargs); + err = kobject_set_name_vargs(&dev->kobj, fmt, vargs); va_end(vargs); - - /* ewww... some of these buggers have / in the name... */ - while ((s = strchr(dev->bus_id, '/'))) - *s = '!'; - - return 0; + return err; } EXPORT_SYMBOL_GPL(dev_set_name); @@ -864,12 +859,17 @@ int device_add(struct device *dev) if (!dev) goto done; - /* Temporarily support init_name if it is set. - * It will override bus_id for now */ - if (dev->init_name) - dev_set_name(dev, "%s", dev->init_name); + /* + * for statically allocated devices, which should all be converted + * some day, we need to initialize the name. We prevent reading back + * the name, and force the use of dev_name() + */ + if (dev->init_name) { + dev_set_name(dev, dev->init_name); + dev->init_name = NULL; + } - if (!strlen(dev->bus_id)) + if (!dev_name(dev)) goto done; pr_debug("device: '%s': %s\n", dev_name(dev), __func__); @@ -1348,7 +1348,10 @@ struct device *device_create_vargs(struct class *class, struct device *parent, dev->release = device_create_release; dev_set_drvdata(dev, drvdata); - vsnprintf(dev->bus_id, BUS_ID_SIZE, fmt, args); + retval = kobject_set_name_vargs(&dev->kobj, fmt, args); + if (retval) + goto error; + retval = device_register(dev); if (retval) goto error; @@ -1452,19 +1455,15 @@ int device_rename(struct device *dev, char *new_name) old_class_name = make_class_name(dev->class->name, &dev->kobj); #endif - old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + old_device_name = kstrdup(dev_name(dev), GFP_KERNEL); if (!old_device_name) { error = -ENOMEM; goto out; } - strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); - strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); error = kobject_rename(&dev->kobj, new_name); - if (error) { - strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); + if (error) goto out; - } #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { diff --git a/include/linux/device.h b/include/linux/device.h index 47f343c7bdda..d5706c448bcb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -374,7 +374,6 @@ struct device { struct device *parent; struct kobject kobj; - char bus_id[BUS_ID_SIZE]; /* position on parent bus */ unsigned uevent_suppress:1; const char *init_name; /* initial name of the device */ struct device_type *type; @@ -427,8 +426,7 @@ struct device { static inline const char *dev_name(const struct device *dev) { - /* will be changed into kobject_name(&dev->kobj) in the near future */ - return dev->bus_id; + return kobject_name(&dev->kobj); } extern int dev_set_name(struct device *dev, const char *name, ...) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 5437ac0276e2..c9c214d7bba2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -72,6 +72,8 @@ struct kobject { extern int kobject_set_name(struct kobject *kobj, const char *name, ...) __attribute__((format(printf, 2, 3))); +extern int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, + va_list vargs); static inline const char *kobject_name(const struct kobject *kobj) { diff --git a/lib/kobject.c b/lib/kobject.c index 0487d1f64806..a6dec32f2ddd 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -212,7 +212,7 @@ static int kobject_add_internal(struct kobject *kobj) * @fmt: format string used to build the name * @vargs: vargs to format the string. */ -static int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, +int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs) { const char *old_name = kobj->name; -- cgit v1.2.3-59-g8ed1b From f67f129e519fa87f8ebd236b6336fe43f31ee141 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 1 Mar 2009 21:10:49 +0800 Subject: Driver core: implement uevent suppress in kobject This patch implements uevent suppress in kobject and removes it from struct device, based on the following ideas: 1,Uevent sending should be one attribute of kobject, so suppressing it in kobject layer is more natural than in device layer. By this way, we can do it for other objects embedded with kobject. 2,It may save several bytes for each instance of struct device.(On my omap3(32bit ARM) based box, can save 8bytes per device object) This patch also introduces dev_set|get_uevent_suppress() helpers to set and query uevent_suppress attribute in case to help kobject as private part of struct device in future. [This version is against the latest driver-core patch set of Greg,please ignore the last version.] Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/dock.c | 2 +- drivers/base/core.c | 2 -- drivers/base/firmware_class.c | 4 ++-- drivers/i2c/i2c-core.c | 2 +- drivers/s390/cio/chsc_sch.c | 4 ++-- drivers/s390/cio/css.c | 4 ++-- drivers/s390/cio/device.c | 4 ++-- fs/partitions/check.c | 10 +++++----- include/linux/device.h | 11 ++++++++++- include/linux/kobject.h | 1 + lib/kobject_uevent.c | 7 +++++++ 11 files changed, 33 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 35094f230b1e..7af7db1ba8c4 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -977,7 +977,7 @@ static int dock_add(acpi_handle handle) sizeof(struct dock_station *)); /* we want the dock device to send uevents */ - dock_device->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&dock_device->dev, 0); if (is_dock(handle)) dock_station->flags |= DOCK_IS_DOCK; diff --git a/drivers/base/core.c b/drivers/base/core.c index a90f56f64d6f..95c67ffd71da 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -136,8 +136,6 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) if (ktype == &device_ktype) { struct device *dev = to_dev(kobj); - if (dev->uevent_suppress) - return 0; if (dev->bus) return 1; if (dev->class) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 44699d9dd85c..d3a59c688fe4 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -319,7 +319,7 @@ static int fw_register_device(struct device **dev_p, const char *fw_name, f_dev->parent = device; f_dev->class = &firmware_class; dev_set_drvdata(f_dev, fw_priv); - f_dev->uevent_suppress = 1; + dev_set_uevent_suppress(f_dev, 1); retval = device_register(f_dev); if (retval) { dev_err(device, "%s: device_register failed\n", __func__); @@ -366,7 +366,7 @@ static int fw_setup_device(struct firmware *fw, struct device **dev_p, } if (uevent) - f_dev->uevent_suppress = 0; + dev_set_uevent_suppress(f_dev, 0); *dev_p = f_dev; goto out; diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index e7d984866de0..fbb9030b68a5 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -841,7 +841,7 @@ int i2c_attach_client(struct i2c_client *client) if (client->driver && !is_newstyle_driver(client->driver)) { client->dev.release = i2c_client_release; - client->dev.uevent_suppress = 1; + dev_set_uevent_suppress(&client->dev, 1); } else client->dev.release = i2c_client_dev_release; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 0a2f2edafc03..93eca1731b81 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -84,8 +84,8 @@ static int chsc_subchannel_probe(struct subchannel *sch) kfree(private); } else { sch->private = private; - if (sch->dev.uevent_suppress) { - sch->dev.uevent_suppress = 0; + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); } } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 8019288bc6de..427d11d88069 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -272,7 +272,7 @@ static int css_register_subchannel(struct subchannel *sch) * the subchannel driver can decide itself when it wants to inform * userspace of its existence. */ - sch->dev.uevent_suppress = 1; + dev_set_uevent_suppress(&sch->dev, 1); css_update_ssd_info(sch); /* make it known to the system */ ret = css_sch_device_register(sch); @@ -287,7 +287,7 @@ static int css_register_subchannel(struct subchannel *sch) * a fitting driver module may be loaded based on the * modalias. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); } return ret; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 23d5752349b5..611d2e001dd5 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -981,7 +981,7 @@ io_subchannel_register(struct work_struct *work) * Now we know this subchannel will stay, we can throw * our delayed uevent. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); /* make it known to the system */ ret = ccw_device_register(cdev); @@ -1243,7 +1243,7 @@ static int io_subchannel_probe(struct subchannel *sch) * the ccw_device and exit. This happens for all early * devices, e.g. the console. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); cdev->dev.groups = ccwdev_attr_groups; device_initialize(&cdev->dev); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6d720243f5f4..38e337d51ced 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - pdev->uevent_suppress = 1; + dev_set_uevent_suppress(pdev, 1); err = device_add(pdev); if (err) goto out_put; @@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!p->holder_dir) goto out_del; - pdev->uevent_suppress = 0; + dev_set_uevent_suppress(pdev, 0); if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(pdev, &dev_attr_whole_disk); if (err) @@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ - if (!ddev->uevent_suppress) + if (!dev_get_uevent_suppress(pdev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); return p; @@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk) dev_set_name(ddev, disk->disk_name); /* delay uevents, until we scanned partition table */ - ddev->uevent_suppress = 1; + dev_set_uevent_suppress(ddev, 1); if (device_add(ddev)) return; @@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - ddev->uevent_suppress = 0; + dev_set_uevent_suppress(ddev, 0); kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ diff --git a/include/linux/device.h b/include/linux/device.h index 4bea53fe8f4c..914c1016dd8f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -373,7 +373,6 @@ struct device { struct device_private *p; struct kobject kobj; - unsigned uevent_suppress:1; const char *init_name; /* initial name of the device */ struct device_type *type; @@ -465,6 +464,16 @@ static inline void dev_set_drvdata(struct device *dev, void *data) dev->driver_data = data; } +static inline unsigned int dev_get_uevent_suppress(const struct device *dev) +{ + return dev->kobj.uevent_suppress; +} + +static inline void dev_set_uevent_suppress(struct device *dev, int val) +{ + dev->kobj.uevent_suppress = val; +} + static inline int device_is_registered(struct device *dev) { return dev->kobj.state_in_sysfs; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c9c214d7bba2..58ae8e00fcdd 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -68,6 +68,7 @@ struct kobject { unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; }; extern int kobject_set_name(struct kobject *kobj, const char *name, ...) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 318328ddbd1c..b2181cc8e4d8 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -118,6 +118,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, kset = top_kobj->kset; uevent_ops = kset->uevent_ops; + /* skip the event, if uevent_suppress is set*/ + if (kobj->uevent_suppress) { + pr_debug("kobject: '%s' (%p): %s: uevent_suppress " + "caused the event to drop!\n", + kobject_name(kobj), kobj, __func__); + return 0; + } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) if (!uevent_ops->filter(kset, kobj)) { -- cgit v1.2.3-59-g8ed1b From f520360d93cdc37de5d972dac4bf3bdef6a7f6a7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 19 Mar 2009 09:09:05 -0700 Subject: kobject: don't block for each kobject_uevent Right now, the kobject_uevent code blocks for each uevent that's being generated, due to using (for hystoric reasons) UHM_WAIT_EXEC as flag to call_usermode_helper(). Specifically, the effect is that each uevent that is being sent causes the code to wake up keventd, then block until keventd has processed the work. Needless to say, this happens many times during the system boot. This patches changes that to UHN_NO_WAIT (brilliant name for a constant btw) so that we only schedule the work to fire the uevent message, but do not wait for keventd to process the work. This removes one of the bottlenecks during boot; each one of them is only a small effect, but the sum of them does add up. [Note, distros that need this are broken, they should be setting CONFIG_UEVENT_HELPER_PATH to "", that way this code path will never be excuted at all -- gregkh] Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index b2181cc8e4d8..e68e743bd861 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -255,7 +255,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, goto exit; retval = call_usermodehelper(argv[0], argv, - env->envp, UMH_WAIT_EXEC); + env->envp, UMH_NO_WAIT); } exit: -- cgit v1.2.3-59-g8ed1b From e9d376f0fa66bd630fe27403669c6ae6c22a868f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 5 Feb 2009 11:51:38 -0500 Subject: dynamic debug: combine dprintk and dynamic printk This patch combines Greg Bank's dprintk() work with the existing dynamic printk patchset, we are now calling it 'dynamic debug'. The new feature of this patchset is a richer /debugfs control file interface, (an example output from my system is at the bottom), which allows fined grained control over the the debug output. The output can be controlled by function, file, module, format string, and line number. for example, enabled all debug messages in module 'nf_conntrack': echo -n 'module nf_conntrack +p' > /mnt/debugfs/dynamic_debug/control to disable them: echo -n 'module nf_conntrack -p' > /mnt/debugfs/dynamic_debug/control A further explanation can be found in the documentation patch. Signed-off-by: Greg Banks Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 5 - include/asm-generic/vmlinux.lds.h | 15 +- include/linux/device.h | 2 +- include/linux/dynamic_debug.h | 88 +++++ include/linux/dynamic_printk.h | 93 ----- include/linux/kernel.h | 4 +- kernel/module.c | 25 +- lib/Kconfig.debug | 2 +- lib/Makefile | 2 +- lib/dynamic_debug.c | 756 ++++++++++++++++++++++++++++++++++++ lib/dynamic_printk.c | 414 -------------------- net/netfilter/nf_conntrack_pptp.c | 2 +- scripts/Makefile.lib | 2 +- 13 files changed, 867 insertions(+), 543 deletions(-) create mode 100644 include/linux/dynamic_debug.h delete mode 100644 include/linux/dynamic_printk.h create mode 100644 lib/dynamic_debug.c delete mode 100644 lib/dynamic_printk.c (limited to 'lib') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54f21a5c262b..3a1aa8a4affc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1816,11 +1816,6 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). - dynamic_printk Enables pr_debug()/dev_dbg() calls if - CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. - These can also be switched on/off via - /dynamic_printk/modules - print-fatal-signals= [KNL] debug: print fatal signals print-fatal-signals=1: print segfault info to diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..aca40b93bd28 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -80,6 +80,11 @@ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + /* implement dynamic printk debug */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___verbose) = .; \ + *(__verbose) \ + VMLINUX_SYMBOL(__stop___verbose) = .; \ LIKELY_PROFILE() \ BRANCH_PROFILE() @@ -309,15 +314,7 @@ CPU_DISCARD(init.data) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.data) \ - MEM_DISCARD(init.rodata) \ - /* implement dynamic printk debug */ \ - VMLINUX_SYMBOL(__start___verbose_strings) = .; \ - *(__verbose_strings) \ - VMLINUX_SYMBOL(__stop___verbose_strings) = .; \ - . = ALIGN(8); \ - VMLINUX_SYMBOL(__start___verbose) = .; \ - *(__verbose) \ - VMLINUX_SYMBOL(__stop___verbose) = .; + MEM_DISCARD(init.rodata) #define INIT_TEXT \ *(.init.text) \ diff --git a/include/linux/device.h b/include/linux/device.h index f98d0cfb4f81..2918c0e8fdfd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -582,7 +582,7 @@ extern const char *dev_driver_string(const struct device *dev); #if defined(DEBUG) #define dev_dbg(dev, format, arg...) \ dev_printk(KERN_DEBUG , dev , format , ## arg) -#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#elif defined(CONFIG_DYNAMIC_DEBUG) #define dev_dbg(dev, format, ...) do { \ dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \ } while (0) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h new file mode 100644 index 000000000000..07781aaa1164 --- /dev/null +++ b/include/linux/dynamic_debug.h @@ -0,0 +1,88 @@ +#ifndef _DYNAMIC_DEBUG_H +#define _DYNAMIC_DEBUG_H + +/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +extern long long dynamic_debug_enabled; +extern long long dynamic_debug_enabled2; + +/* + * An instance of this structure is created in a special + * ELF section at every dynamic debug callsite. At runtime, + * the special section is treated as an array of these. + */ +struct _ddebug { + /* + * These fields are used to drive the user interface + * for selecting and displaying debug callsites. + */ + const char *modname; + const char *function; + const char *filename; + const char *format; + char primary_hash; + char secondary_hash; + unsigned int lineno:24; + /* + * The flags field controls the behaviour at the callsite. + * The bits here are changed dynamically when the user + * writes commands to /dynamic_debug/ddebug + */ +#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ +#define _DPRINTK_FLAGS_DEFAULT 0 + unsigned int flags:8; +} __attribute__((aligned(8))); + + +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *modname); + +#if defined(CONFIG_DYNAMIC_DEBUG) +extern int ddebug_remove_module(char *mod_name); + +#define __dynamic_dbg_enabled(dd) ({ \ + int __ret = 0; \ + if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ + (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ + if (unlikely(dd.flags)) \ + __ret = 1; \ + __ret; }) + +#define dynamic_pr_debug(fmt, ...) do { \ + static struct _ddebug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ + DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ + if (__dynamic_dbg_enabled(descriptor)) \ + printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ + ##__VA_ARGS__); \ + } while (0) + + +#define dynamic_dev_dbg(dev, fmt, ...) do { \ + static struct _ddebug descriptor \ + __used \ + __attribute__((section("__verbose"), aligned(8))) = \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ + DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ + if (__dynamic_dbg_enabled(descriptor)) \ + dev_printk(KERN_DEBUG, dev, \ + KBUILD_MODNAME ": " fmt, \ + ##__VA_ARGS__); \ + } while (0) + +#else + +static inline int ddebug_remove_module(char *mod) +{ + return 0; +} + +#define dynamic_pr_debug(fmt, ...) do { } while (0) +#define dynamic_dev_dbg(dev, format, ...) do { } while (0) +#endif + +#endif diff --git a/include/linux/dynamic_printk.h b/include/linux/dynamic_printk.h deleted file mode 100644 index 2d528d009074..000000000000 --- a/include/linux/dynamic_printk.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef _DYNAMIC_PRINTK_H -#define _DYNAMIC_PRINTK_H - -#define DYNAMIC_DEBUG_HASH_BITS 6 -#define DEBUG_HASH_TABLE_SIZE (1 << DYNAMIC_DEBUG_HASH_BITS) - -#define TYPE_BOOLEAN 1 - -#define DYNAMIC_ENABLED_ALL 0 -#define DYNAMIC_ENABLED_NONE 1 -#define DYNAMIC_ENABLED_SOME 2 - -extern int dynamic_enabled; - -/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -extern long long dynamic_printk_enabled; -extern long long dynamic_printk_enabled2; - -struct mod_debug { - char *modname; - char *logical_modname; - char *flag_names; - int type; - int hash; - int hash2; -} __attribute__((aligned(8))); - -int register_dynamic_debug_module(char *mod_name, int type, char *share_name, - char *flags, int hash, int hash2); - -#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG) -extern int unregister_dynamic_debug_module(char *mod_name); -extern int __dynamic_dbg_enabled_helper(char *modname, int type, - int value, int hash); - -#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_printk_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_printk_enabled2 & (1LL << DEBUG_HASH2)))) \ - __ret = __dynamic_dbg_enabled_helper(module, type, \ - value, hash);\ - __ret; }) - -#define dynamic_pr_debug(fmt, ...) do { \ - static char mod_name[] \ - __attribute__((section("__verbose_strings"))) \ - = KBUILD_MODNAME; \ - static struct mod_debug descriptor \ - __used \ - __attribute__((section("__verbose"), aligned(8))) = \ - { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ - if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ - 0, 0, DEBUG_HASH)) \ - printk(KERN_DEBUG KBUILD_MODNAME ":" fmt, \ - ##__VA_ARGS__); \ - } while (0) - -#define dynamic_dev_dbg(dev, format, ...) do { \ - static char mod_name[] \ - __attribute__((section("__verbose_strings"))) \ - = KBUILD_MODNAME; \ - static struct mod_debug descriptor \ - __used \ - __attribute__((section("__verbose"), aligned(8))) = \ - { mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\ - if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN, \ - 0, 0, DEBUG_HASH)) \ - dev_printk(KERN_DEBUG, dev, \ - KBUILD_MODNAME ": " format, \ - ##__VA_ARGS__); \ - } while (0) - -#else - -static inline int unregister_dynamic_debug_module(const char *mod_name) -{ - return 0; -} -static inline int __dynamic_dbg_enabled_helper(char *modname, int type, - int value, int hash) -{ - return 0; -} - -#define __dynamic_dbg_enabled(module, type, value, level, hash) ({ 0; }) -#define dynamic_pr_debug(fmt, ...) do { } while (0) -#define dynamic_dev_dbg(dev, format, ...) do { } while (0) -#endif - -#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7fa371898e3e..b5496ecbec71 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -358,7 +358,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #if defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#elif defined(CONFIG_DYNAMIC_DEBUG) #define pr_debug(fmt, ...) do { \ dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) diff --git a/kernel/module.c b/kernel/module.c index 1196f5d11700..77672233387f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -822,7 +822,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); - unregister_dynamic_debug_module(mod->name); + ddebug_remove_module(mod->name); free_module(mod); out: @@ -1827,19 +1827,13 @@ static inline void add_kallsyms(struct module *mod, } #endif /* CONFIG_KALLSYMS */ -static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num) +static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) { -#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG - unsigned int i; - - for (i = 0; i < num; i++) { - register_dynamic_debug_module(debug[i].modname, - debug[i].type, - debug[i].logical_modname, - debug[i].flag_names, - debug[i].hash, debug[i].hash2); - } -#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ +#ifdef CONFIG_DYNAMIC_DEBUG + if (ddebug_add_module(debug, num, debug->modname)) + printk(KERN_ERR "dynamic debug error adding module: %s\n", + debug->modname); +#endif } static void *module_alloc_update_bounds(unsigned long size) @@ -2213,12 +2207,13 @@ static noinline struct module *load_module(void __user *umod, add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); if (!mod->taints) { - struct mod_debug *debug; + struct _ddebug *debug; unsigned int num_debug; debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); - dynamic_printk_setup(debug, num_debug); + if (debug) + dynamic_debug_setup(debug, num_debug); } /* sechdrs[0].sh_size is always zero */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1bcf9cd4baa0..0dd1c04c7323 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -847,7 +847,7 @@ config BUILD_DOCSRC Say N if you are unsure. -config DYNAMIC_PRINTK_DEBUG +config DYNAMIC_DEBUG bool "Enable dynamic printk() call support" default n depends on PRINTK diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded27..8633d6be9d21 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_HAVE_LMB) += lmb.o obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o -obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c new file mode 100644 index 000000000000..9e123ae326bc --- /dev/null +++ b/lib/dynamic_debug.c @@ -0,0 +1,756 @@ +/* + * lib/dynamic_debug.c + * + * make pr_debug()/dev_dbg() calls runtime configurable based upon their + * source module. + * + * Copyright (C) 2008 Jason Baron + * By Greg Banks + * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct _ddebug __start___verbose[]; +extern struct _ddebug __stop___verbose[]; + +/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +long long dynamic_debug_enabled; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled); +long long dynamic_debug_enabled2; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); + +struct ddebug_table { + struct list_head link; + char *mod_name; + unsigned int num_ddebugs; + unsigned int num_enabled; + struct _ddebug *ddebugs; +}; + +struct ddebug_query { + const char *filename; + const char *module; + const char *function; + const char *format; + unsigned int first_lineno, last_lineno; +}; + +struct ddebug_iter { + struct ddebug_table *table; + unsigned int idx; +}; + +static DEFINE_MUTEX(ddebug_lock); +static LIST_HEAD(ddebug_tables); +static int verbose = 0; + +/* Return the last part of a pathname */ +static inline const char *basename(const char *path) +{ + const char *tail = strrchr(path, '/'); + return tail ? tail+1 : path; +} + +/* format a string into buf[] which describes the _ddebug's flags */ +static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, + size_t maxlen) +{ + char *p = buf; + + BUG_ON(maxlen < 4); + if (dp->flags & _DPRINTK_FLAGS_PRINT) + *p++ = 'p'; + if (p == buf) + *p++ = '-'; + *p = '\0'; + + return buf; +} + +/* + * must be called with ddebug_lock held + */ + +static int disabled_hash(char hash, bool first_table) +{ + struct ddebug_table *dt; + char table_hash_value; + + list_for_each_entry(dt, &ddebug_tables, link) { + if (first_table) + table_hash_value = dt->ddebugs->primary_hash; + else + table_hash_value = dt->ddebugs->secondary_hash; + if (dt->num_enabled && (hash == table_hash_value)) + return 0; + } + return 1; +} + +/* + * Search the tables for _ddebug's which match the given + * `query' and apply the `flags' and `mask' to them. Tells + * the user which ddebug's were changed, or whether none + * were matched. + */ +static void ddebug_change(const struct ddebug_query *query, + unsigned int flags, unsigned int mask) +{ + int i; + struct ddebug_table *dt; + unsigned int newflags; + unsigned int nfound = 0; + char flagbuf[8]; + + /* search for matching ddebugs */ + mutex_lock(&ddebug_lock); + list_for_each_entry(dt, &ddebug_tables, link) { + + /* match against the module name */ + if (query->module != NULL && + strcmp(query->module, dt->mod_name)) + continue; + + for (i = 0 ; i < dt->num_ddebugs ; i++) { + struct _ddebug *dp = &dt->ddebugs[i]; + + /* match against the source filename */ + if (query->filename != NULL && + strcmp(query->filename, dp->filename) && + strcmp(query->filename, basename(dp->filename))) + continue; + + /* match against the function */ + if (query->function != NULL && + strcmp(query->function, dp->function)) + continue; + + /* match against the format */ + if (query->format != NULL && + strstr(dp->format, query->format) == NULL) + continue; + + /* match against the line number range */ + if (query->first_lineno && + dp->lineno < query->first_lineno) + continue; + if (query->last_lineno && + dp->lineno > query->last_lineno) + continue; + + nfound++; + + newflags = (dp->flags & mask) | flags; + if (newflags == dp->flags) + continue; + + if (!newflags) + dt->num_enabled--; + else if (!dp-flags) + dt->num_enabled++; + dp->flags = newflags; + if (newflags) { + dynamic_debug_enabled |= + (1LL << dp->primary_hash); + dynamic_debug_enabled2 |= + (1LL << dp->secondary_hash); + } else { + if (disabled_hash(dp->primary_hash, true)) + dynamic_debug_enabled &= + ~(1LL << dp->primary_hash); + if (disabled_hash(dp->secondary_hash, false)) + dynamic_debug_enabled2 &= + ~(1LL << dp->secondary_hash); + } + if (verbose) + printk(KERN_INFO + "ddebug: changed %s:%d [%s]%s %s\n", + dp->filename, dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); + } + } + mutex_unlock(&ddebug_lock); + + if (!nfound && verbose) + printk(KERN_INFO "ddebug: no matches for query\n"); +} + +/* + * Wrapper around strsep() to collapse the multiple empty tokens + * that it returns when fed sequences of separator characters. + * Now, if we had strtok_r()... + */ +static inline char *nearly_strtok_r(char **p, const char *sep) +{ + char *r; + + while ((r = strsep(p, sep)) != NULL && *r == '\0') + ; + return r; +} + +/* + * Split the buffer `buf' into space-separated words. + * Return the number of such words or <0 on error. + */ +static int ddebug_tokenize(char *buf, char *words[], int maxwords) +{ + int nwords = 0; + + while (nwords < maxwords && + (words[nwords] = nearly_strtok_r(&buf, " \t\r\n")) != NULL) + nwords++; + if (buf) + return -EINVAL; /* ran out of words[] before bytes */ + + if (verbose) { + int i; + printk(KERN_INFO "%s: split into words:", __func__); + for (i = 0 ; i < nwords ; i++) + printk(" \"%s\"", words[i]); + printk("\n"); + } + + return nwords; +} + +/* + * Parse a single line number. Note that the empty string "" + * is treated as a special case and converted to zero, which + * is later treated as a "don't care" value. + */ +static inline int parse_lineno(const char *str, unsigned int *val) +{ + char *end = NULL; + BUG_ON(str == NULL); + if (*str == '\0') { + *val = 0; + return 0; + } + *val = simple_strtoul(str, &end, 10); + return end == NULL || end == str || *end != '\0' ? -EINVAL : 0; +} + +/* + * Undo octal escaping in a string, inplace. This is useful to + * allow the user to express a query which matches a format + * containing embedded spaces. + */ +#define isodigit(c) ((c) >= '0' && (c) <= '7') +static char *unescape(char *str) +{ + char *in = str; + char *out = str; + + while (*in) { + if (*in == '\\') { + if (in[1] == '\\') { + *out++ = '\\'; + in += 2; + continue; + } else if (in[1] == 't') { + *out++ = '\t'; + in += 2; + continue; + } else if (in[1] == 'n') { + *out++ = '\n'; + in += 2; + continue; + } else if (isodigit(in[1]) && + isodigit(in[2]) && + isodigit(in[3])) { + *out++ = ((in[1] - '0')<<6) | + ((in[2] - '0')<<3) | + (in[3] - '0'); + in += 4; + continue; + } + } + *out++ = *in++; + } + *out = '\0'; + + return str; +} + +/* + * Parse words[] as a ddebug query specification, which is a series + * of (keyword, value) pairs chosen from these possibilities: + * + * func + * file + * file + * module + * format + * line + * line - // where either may be empty + */ +static int ddebug_parse_query(char *words[], int nwords, + struct ddebug_query *query) +{ + unsigned int i; + + /* check we have an even number of words */ + if (nwords % 2 != 0) + return -EINVAL; + memset(query, 0, sizeof(*query)); + + for (i = 0 ; i < nwords ; i += 2) { + if (!strcmp(words[i], "func")) + query->function = words[i+1]; + else if (!strcmp(words[i], "file")) + query->filename = words[i+1]; + else if (!strcmp(words[i], "module")) + query->module = words[i+1]; + else if (!strcmp(words[i], "format")) + query->format = unescape(words[i+1]); + else if (!strcmp(words[i], "line")) { + char *first = words[i+1]; + char *last = strchr(first, '-'); + if (last) + *last++ = '\0'; + if (parse_lineno(first, &query->first_lineno) < 0) + return -EINVAL; + if (last != NULL) { + /* range - */ + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + } else { + query->last_lineno = query->first_lineno; + } + } else { + if (verbose) + printk(KERN_ERR "%s: unknown keyword \"%s\"\n", + __func__, words[i]); + return -EINVAL; + } + } + + if (verbose) + printk(KERN_INFO "%s: q->function=\"%s\" q->filename=\"%s\" " + "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n", + __func__, query->function, query->filename, + query->module, query->format, query->first_lineno, + query->last_lineno); + + return 0; +} + +/* + * Parse `str' as a flags specification, format [-+=][p]+. + * Sets up *maskp and *flagsp to be used when changing the + * flags fields of matched _ddebug's. Returns 0 on success + * or <0 on error. + */ +static int ddebug_parse_flags(const char *str, unsigned int *flagsp, + unsigned int *maskp) +{ + unsigned flags = 0; + int op = '='; + + switch (*str) { + case '+': + case '-': + case '=': + op = *str++; + break; + default: + return -EINVAL; + } + if (verbose) + printk(KERN_INFO "%s: op='%c'\n", __func__, op); + + for ( ; *str ; ++str) { + switch (*str) { + case 'p': + flags |= _DPRINTK_FLAGS_PRINT; + break; + default: + return -EINVAL; + } + } + if (flags == 0) + return -EINVAL; + if (verbose) + printk(KERN_INFO "%s: flags=0x%x\n", __func__, flags); + + /* calculate final *flagsp, *maskp according to mask and op */ + switch (op) { + case '=': + *maskp = 0; + *flagsp = flags; + break; + case '+': + *maskp = ~0U; + *flagsp = flags; + break; + case '-': + *maskp = ~flags; + *flagsp = 0; + break; + } + if (verbose) + printk(KERN_INFO "%s: *flagsp=0x%x *maskp=0x%x\n", + __func__, *flagsp, *maskp); + return 0; +} + +/* + * File_ops->write method for /dynamic_debug/conrol. Gathers the + * command text from userspace, parses and executes it. + */ +static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + unsigned int flags = 0, mask = 0; + struct ddebug_query query; +#define MAXWORDS 9 + int nwords; + char *words[MAXWORDS]; + char tmpbuf[256]; + + if (len == 0) + return 0; + /* we don't check *offp -- multiple writes() are allowed */ + if (len > sizeof(tmpbuf)-1) + return -E2BIG; + if (copy_from_user(tmpbuf, ubuf, len)) + return -EFAULT; + tmpbuf[len] = '\0'; + if (verbose) + printk(KERN_INFO "%s: read %d bytes from userspace\n", + __func__, (int)len); + + nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); + if (nwords < 0) + return -EINVAL; + if (ddebug_parse_query(words, nwords-1, &query)) + return -EINVAL; + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + return -EINVAL; + + /* actually go and implement the change */ + ddebug_change(&query, flags, mask); + + *offp += len; + return len; +} + +/* + * Set the iterator to point to the first _ddebug object + * and return a pointer to that first object. Returns + * NULL if there are no _ddebugs at all. + */ +static struct _ddebug *ddebug_iter_first(struct ddebug_iter *iter) +{ + if (list_empty(&ddebug_tables)) { + iter->table = NULL; + iter->idx = 0; + return NULL; + } + iter->table = list_entry(ddebug_tables.next, + struct ddebug_table, link); + iter->idx = 0; + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Advance the iterator to point to the next _ddebug + * object from the one the iterator currently points at, + * and returns a pointer to the new _ddebug. Returns + * NULL if the iterator has seen all the _ddebugs. + */ +static struct _ddebug *ddebug_iter_next(struct ddebug_iter *iter) +{ + if (iter->table == NULL) + return NULL; + if (++iter->idx == iter->table->num_ddebugs) { + /* iterate to next table */ + iter->idx = 0; + if (list_is_last(&iter->table->link, &ddebug_tables)) { + iter->table = NULL; + return NULL; + } + iter->table = list_entry(iter->table->link.next, + struct ddebug_table, link); + } + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Seq_ops start method. Called at the start of every + * read() call from userspace. Takes the ddebug_lock and + * seeks the seq_file's iterator to the given position. + */ +static void *ddebug_proc_start(struct seq_file *m, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + int n = *pos; + + if (verbose) + printk(KERN_INFO "%s: called m=%p *pos=%lld\n", + __func__, m, (unsigned long long)*pos); + + mutex_lock(&ddebug_lock); + + if (!n) + return SEQ_START_TOKEN; + if (n < 0) + return NULL; + dp = ddebug_iter_first(iter); + while (dp != NULL && --n > 0) + dp = ddebug_iter_next(iter); + return dp; +} + +/* + * Seq_ops next method. Called several times within a read() + * call from userspace, with ddebug_lock held. Walks to the + * next _ddebug object with a special case for the header line. + */ +static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p *pos=%lld\n", + __func__, m, p, (unsigned long long)*pos); + + if (p == SEQ_START_TOKEN) + dp = ddebug_iter_first(iter); + else + dp = ddebug_iter_next(iter); + ++*pos; + return dp; +} + +/* + * Seq_ops show method. Called several times within a read() + * call from userspace, with ddebug_lock held. Formats the + * current _ddebug as a single human-readable line, with a + * special case for the header line. + */ +static int ddebug_proc_show(struct seq_file *m, void *p) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp = p; + char flagsbuf[8]; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + + if (p == SEQ_START_TOKEN) { + seq_puts(m, + "# filename:lineno [module]function flags format\n"); + return 0; + } + + seq_printf(m, "%s:%u [%s]%s %s \"", + dp->filename, dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + seq_escape(m, dp->format, "\t\r\n\""); + seq_puts(m, "\"\n"); + + return 0; +} + +/* + * Seq_ops stop method. Called at the end of each read() + * call from userspace. Drops ddebug_lock. + */ +static void ddebug_proc_stop(struct seq_file *m, void *p) +{ + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + mutex_unlock(&ddebug_lock); +} + +static const struct seq_operations ddebug_proc_seqops = { + .start = ddebug_proc_start, + .next = ddebug_proc_next, + .show = ddebug_proc_show, + .stop = ddebug_proc_stop +}; + +/* + * File_ops->open method for /dynamic_debug/control. Does the seq_file + * setup dance, and also creates an iterator to walk the _ddebugs. + * Note that we create a seq_file always, even for O_WRONLY files + * where it's not needed, as doing so simplifies the ->release method. + */ +static int ddebug_proc_open(struct inode *inode, struct file *file) +{ + struct ddebug_iter *iter; + int err; + + if (verbose) + printk(KERN_INFO "%s: called\n", __func__); + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (iter == NULL) + return -ENOMEM; + + err = seq_open(file, &ddebug_proc_seqops); + if (err) { + kfree(iter); + return err; + } + ((struct seq_file *) file->private_data)->private = iter; + return 0; +} + +static const struct file_operations ddebug_proc_fops = { + .owner = THIS_MODULE, + .open = ddebug_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, + .write = ddebug_proc_write +}; + +/* + * Allocate a new ddebug_table for the given module + * and add it to the global list. + */ +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *name) +{ + struct ddebug_table *dt; + char *new_name; + + dt = kzalloc(sizeof(*dt), GFP_KERNEL); + if (dt == NULL) + return -ENOMEM; + new_name = kstrdup(name, GFP_KERNEL); + if (new_name == NULL) { + kfree(dt); + return -ENOMEM; + } + dt->mod_name = new_name; + dt->num_ddebugs = n; + dt->num_enabled = 0; + dt->ddebugs = tab; + + mutex_lock(&ddebug_lock); + list_add_tail(&dt->link, &ddebug_tables); + mutex_unlock(&ddebug_lock); + + if (verbose) + printk(KERN_INFO "%u debug prints in module %s\n", + n, dt->mod_name); + return 0; +} +EXPORT_SYMBOL_GPL(ddebug_add_module); + +static void ddebug_table_free(struct ddebug_table *dt) +{ + list_del_init(&dt->link); + kfree(dt->mod_name); + kfree(dt); +} + +/* + * Called in response to a module being unloaded. Removes + * any ddebug_table's which point at the module. + */ +int ddebug_remove_module(char *mod_name) +{ + struct ddebug_table *dt, *nextdt; + int ret = -ENOENT; + + if (verbose) + printk(KERN_INFO "%s: removing module \"%s\"\n", + __func__, mod_name); + + mutex_lock(&ddebug_lock); + list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { + if (!strcmp(dt->mod_name, mod_name)) { + ddebug_table_free(dt); + ret = 0; + } + } + mutex_unlock(&ddebug_lock); + return ret; +} +EXPORT_SYMBOL_GPL(ddebug_remove_module); + +static void ddebug_remove_all_tables(void) +{ + mutex_lock(&ddebug_lock); + while (!list_empty(&ddebug_tables)) { + struct ddebug_table *dt = list_entry(ddebug_tables.next, + struct ddebug_table, + link); + ddebug_table_free(dt); + } + mutex_unlock(&ddebug_lock); +} + +static int __init dynamic_debug_init(void) +{ + struct dentry *dir, *file; + struct _ddebug *iter, *iter_start; + const char *modname = NULL; + int ret = 0; + int n = 0; + + dir = debugfs_create_dir("dynamic_debug", NULL); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("control", 0644, dir, NULL, + &ddebug_proc_fops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + if (__start___verbose != __stop___verbose) { + iter = __start___verbose; + modname = iter->modname; + iter_start = iter; + for (; iter < __stop___verbose; iter++) { + if (strcmp(modname, iter->modname)) { + ret = ddebug_add_module(iter_start, n, modname); + if (ret) + goto out_free; + n = 0; + modname = iter->modname; + iter_start = iter; + } + n++; + } + ret = ddebug_add_module(iter_start, n, modname); + } +out_free: + if (ret) { + ddebug_remove_all_tables(); + debugfs_remove(dir); + debugfs_remove(file); + } + return 0; +} +module_init(dynamic_debug_init); diff --git a/lib/dynamic_printk.c b/lib/dynamic_printk.c deleted file mode 100644 index 165a19763dc9..000000000000 --- a/lib/dynamic_printk.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * lib/dynamic_printk.c - * - * make pr_debug()/dev_dbg() calls runtime configurable based upon their - * their source module. - * - * Copyright (C) 2008 Red Hat, Inc., Jason Baron - */ - -#include -#include -#include -#include -#include -#include - -extern struct mod_debug __start___verbose[]; -extern struct mod_debug __stop___verbose[]; - -struct debug_name { - struct hlist_node hlist; - struct hlist_node hlist2; - int hash1; - int hash2; - char *name; - int enable; - int type; -}; - -static int nr_entries; -static int num_enabled; -int dynamic_enabled = DYNAMIC_ENABLED_NONE; -static struct hlist_head module_table[DEBUG_HASH_TABLE_SIZE] = - { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; -static struct hlist_head module_table2[DEBUG_HASH_TABLE_SIZE] = - { [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT }; -static DECLARE_MUTEX(debug_list_mutex); - -/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -long long dynamic_printk_enabled; -EXPORT_SYMBOL_GPL(dynamic_printk_enabled); -long long dynamic_printk_enabled2; -EXPORT_SYMBOL_GPL(dynamic_printk_enabled2); - -/* returns the debug module pointer. */ -static struct debug_name *find_debug_module(char *module_name) -{ - int i; - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *element; - - element = NULL; - for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { - head = &module_table[i]; - hlist_for_each_entry_rcu(element, node, head, hlist) - if (!strcmp(element->name, module_name)) - return element; - } - return NULL; -} - -/* returns the debug module pointer. */ -static struct debug_name *find_debug_module_hash(char *module_name, int hash) -{ - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *element; - - element = NULL; - head = &module_table[hash]; - hlist_for_each_entry_rcu(element, node, head, hlist) - if (!strcmp(element->name, module_name)) - return element; - return NULL; -} - -/* caller must hold mutex*/ -static int __add_debug_module(char *mod_name, int hash, int hash2) -{ - struct debug_name *new; - char *module_name; - int ret = 0; - - if (find_debug_module(mod_name)) { - ret = -EINVAL; - goto out; - } - module_name = kmalloc(strlen(mod_name) + 1, GFP_KERNEL); - if (!module_name) { - ret = -ENOMEM; - goto out; - } - module_name = strcpy(module_name, mod_name); - module_name[strlen(mod_name)] = '\0'; - new = kzalloc(sizeof(struct debug_name), GFP_KERNEL); - if (!new) { - kfree(module_name); - ret = -ENOMEM; - goto out; - } - INIT_HLIST_NODE(&new->hlist); - INIT_HLIST_NODE(&new->hlist2); - new->name = module_name; - new->hash1 = hash; - new->hash2 = hash2; - hlist_add_head_rcu(&new->hlist, &module_table[hash]); - hlist_add_head_rcu(&new->hlist2, &module_table2[hash2]); - nr_entries++; -out: - return ret; -} - -int unregister_dynamic_debug_module(char *mod_name) -{ - struct debug_name *element; - int ret = 0; - - down(&debug_list_mutex); - element = find_debug_module(mod_name); - if (!element) { - ret = -EINVAL; - goto out; - } - hlist_del_rcu(&element->hlist); - hlist_del_rcu(&element->hlist2); - synchronize_rcu(); - kfree(element->name); - if (element->enable) - num_enabled--; - kfree(element); - nr_entries--; -out: - up(&debug_list_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(unregister_dynamic_debug_module); - -int register_dynamic_debug_module(char *mod_name, int type, char *share_name, - char *flags, int hash, int hash2) -{ - struct debug_name *elem; - int ret = 0; - - down(&debug_list_mutex); - elem = find_debug_module(mod_name); - if (!elem) { - if (__add_debug_module(mod_name, hash, hash2)) - goto out; - elem = find_debug_module(mod_name); - if (dynamic_enabled == DYNAMIC_ENABLED_ALL && - !strcmp(mod_name, share_name)) { - elem->enable = true; - num_enabled++; - } - } - elem->type |= type; -out: - up(&debug_list_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(register_dynamic_debug_module); - -int __dynamic_dbg_enabled_helper(char *mod_name, int type, int value, int hash) -{ - struct debug_name *elem; - int ret = 0; - - if (dynamic_enabled == DYNAMIC_ENABLED_ALL) - return 1; - rcu_read_lock(); - elem = find_debug_module_hash(mod_name, hash); - if (elem && elem->enable) - ret = 1; - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(__dynamic_dbg_enabled_helper); - -static void set_all(bool enable) -{ - struct debug_name *e; - struct hlist_node *node; - int i; - long long enable_mask; - - for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) { - if (module_table[i].first != NULL) { - hlist_for_each_entry(e, node, &module_table[i], hlist) { - e->enable = enable; - } - } - } - if (enable) - enable_mask = ULLONG_MAX; - else - enable_mask = 0; - dynamic_printk_enabled = enable_mask; - dynamic_printk_enabled2 = enable_mask; -} - -static int disabled_hash(int i, bool first_table) -{ - struct debug_name *e; - struct hlist_node *node; - - if (first_table) { - hlist_for_each_entry(e, node, &module_table[i], hlist) { - if (e->enable) - return 0; - } - } else { - hlist_for_each_entry(e, node, &module_table2[i], hlist2) { - if (e->enable) - return 0; - } - } - return 1; -} - -static ssize_t pr_debug_write(struct file *file, const char __user *buf, - size_t length, loff_t *ppos) -{ - char *buffer, *s, *value_str, *setting_str; - int err, value; - struct debug_name *elem = NULL; - int all = 0; - - if (length > PAGE_SIZE || length < 0) - return -EINVAL; - - buffer = (char *)__get_free_page(GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - err = -EFAULT; - if (copy_from_user(buffer, buf, length)) - goto out; - - err = -EINVAL; - if (length < PAGE_SIZE) - buffer[length] = '\0'; - else if (buffer[PAGE_SIZE-1]) - goto out; - - err = -EINVAL; - down(&debug_list_mutex); - - if (strncmp("set", buffer, 3)) - goto out_up; - s = buffer + 3; - setting_str = strsep(&s, "="); - if (s == NULL) - goto out_up; - setting_str = strstrip(setting_str); - value_str = strsep(&s, " "); - if (s == NULL) - goto out_up; - s = strstrip(s); - if (!strncmp(s, "all", 3)) - all = 1; - else - elem = find_debug_module(s); - if (!strncmp(setting_str, "enable", 6)) { - value = !!simple_strtol(value_str, NULL, 10); - if (all) { - if (value) { - set_all(true); - num_enabled = nr_entries; - dynamic_enabled = DYNAMIC_ENABLED_ALL; - } else { - set_all(false); - num_enabled = 0; - dynamic_enabled = DYNAMIC_ENABLED_NONE; - } - err = 0; - } else if (elem) { - if (value && (elem->enable == 0)) { - dynamic_printk_enabled |= (1LL << elem->hash1); - dynamic_printk_enabled2 |= (1LL << elem->hash2); - elem->enable = 1; - num_enabled++; - dynamic_enabled = DYNAMIC_ENABLED_SOME; - err = 0; - printk(KERN_DEBUG - "debugging enabled for module %s\n", - elem->name); - } else if (!value && (elem->enable == 1)) { - elem->enable = 0; - num_enabled--; - if (disabled_hash(elem->hash1, true)) - dynamic_printk_enabled &= - ~(1LL << elem->hash1); - if (disabled_hash(elem->hash2, false)) - dynamic_printk_enabled2 &= - ~(1LL << elem->hash2); - if (num_enabled) - dynamic_enabled = DYNAMIC_ENABLED_SOME; - else - dynamic_enabled = DYNAMIC_ENABLED_NONE; - err = 0; - printk(KERN_DEBUG - "debugging disabled for module %s\n", - elem->name); - } - } - } - if (!err) - err = length; -out_up: - up(&debug_list_mutex); -out: - free_page((unsigned long)buffer); - return err; -} - -static void *pr_debug_seq_start(struct seq_file *f, loff_t *pos) -{ - return (*pos < DEBUG_HASH_TABLE_SIZE) ? pos : NULL; -} - -static void *pr_debug_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - if (*pos >= DEBUG_HASH_TABLE_SIZE) - return NULL; - return pos; -} - -static void pr_debug_seq_stop(struct seq_file *s, void *v) -{ - /* Nothing to do */ -} - -static int pr_debug_seq_show(struct seq_file *s, void *v) -{ - struct hlist_head *head; - struct hlist_node *node; - struct debug_name *elem; - unsigned int i = *(loff_t *) v; - - rcu_read_lock(); - head = &module_table[i]; - hlist_for_each_entry_rcu(elem, node, head, hlist) { - seq_printf(s, "%s enabled=%d", elem->name, elem->enable); - seq_printf(s, "\n"); - } - rcu_read_unlock(); - return 0; -} - -static struct seq_operations pr_debug_seq_ops = { - .start = pr_debug_seq_start, - .next = pr_debug_seq_next, - .stop = pr_debug_seq_stop, - .show = pr_debug_seq_show -}; - -static int pr_debug_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &pr_debug_seq_ops); -} - -static const struct file_operations pr_debug_operations = { - .open = pr_debug_open, - .read = seq_read, - .write = pr_debug_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int __init dynamic_printk_init(void) -{ - struct dentry *dir, *file; - struct mod_debug *iter; - unsigned long value; - - dir = debugfs_create_dir("dynamic_printk", NULL); - if (!dir) - return -ENOMEM; - file = debugfs_create_file("modules", 0644, dir, NULL, - &pr_debug_operations); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; - } - for (value = (unsigned long)__start___verbose; - value < (unsigned long)__stop___verbose; - value += sizeof(struct mod_debug)) { - iter = (struct mod_debug *)value; - register_dynamic_debug_module(iter->modname, - iter->type, - iter->logical_modname, - iter->flag_names, iter->hash, iter->hash2); - } - if (dynamic_enabled == DYNAMIC_ENABLED_ALL) - set_all(true); - return 0; -} -module_init(dynamic_printk_init); -/* may want to move this earlier so we can get traces as early as possible */ - -static int __init dynamic_printk_setup(char *str) -{ - if (str) - return -ENOENT; - dynamic_enabled = DYNAMIC_ENABLED_ALL; - return 0; -} -/* Use early_param(), so we can get debug output as early as possible */ -early_param("dynamic_printk", dynamic_printk_setup); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 9e169ef2e854..12bd09dbd36c 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -66,7 +66,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index e06365775bdf..c18fa150b6fe 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -97,7 +97,7 @@ modname_flags = $(if $(filter 1,$(words $(modname))),\ -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") #hash values -ifdef CONFIG_DYNAMIC_PRINTK_DEBUG +ifdef CONFIG_DYNAMIC_DEBUG debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\ -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))" else -- cgit v1.2.3-59-g8ed1b From 86151fdf38b3795f292b39defbff39d2684b9c8c Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 5 Feb 2009 11:53:15 -0500 Subject: dynamic debug: update docs updates the documentation for 'dynamic debug' feature. Signed-off-by: Greg Banks Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/dynamic-debug-howto.txt | 232 ++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 72 ++++++----- 2 files changed, 273 insertions(+), 31 deletions(-) create mode 100644 Documentation/dynamic-debug-howto.txt (limited to 'lib') diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt new file mode 100644 index 000000000000..68394825e86b --- /dev/null +++ b/Documentation/dynamic-debug-howto.txt @@ -0,0 +1,232 @@ + +Introduction +============ + +This document describes how to use the dynamic debug (ddebug) feature. + +Dynamic debug is designed to allow you to dynamically enable/disable kernel +code to obtain additional kernel information. Currently, if +CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be +dynamically enabled per-callsite. + +Dynamic debug has even more useful features: + + * Simple query language allows turning on and off debugging statements by + matching any combination of: + + - source filename + - function name + - line number (including ranges of line numbers) + - module name + - format string + + * Provides a debugfs control file: /dynamic_debug/control which can be + read to display the complete list of known debug statements, to help guide you + +Controlling dynamic debug Behaviour +=============================== + +The behaviour of pr_debug()/dev_debug()s are controlled via writing to a +control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs +filesystem, in order to make use of this feature. Subsequently, we refer to the +control file as: /dynamic_debug/control. For example, if you want to +enable printing from source file 'svcsock.c', line 1603 you simply do: + +nullarbor:~ # echo 'file svcsock.c line 1603 +p' > + /dynamic_debug/control + +If you make a mistake with the syntax, the write will fail thus: + +nullarbor:~ # echo 'file svcsock.c wtf 1 +p' > + /dynamic_debug/control +-bash: echo: write error: Invalid argument + +Viewing Dynamic Debug Behaviour +=========================== + +You can view the currently configured behaviour of all the debug statements +via: + +nullarbor:~ # cat /dynamic_debug/control +# filename:lineno [module]function flags format +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA\040Module\040Removed,\040deregister\040RPC\040RDMA\040transport\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline\040\040\040\040\040\040\040:\040%d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth\040\040\040\040\040\040\040\040\040:\040%d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests\040\040\040\040\040:\040%d\012" +... + + +You can also apply standard Unix text manipulation filters to this +data, e.g. + +nullarbor:~ # grep -i rdma /dynamic_debug/control | wc -l +62 + +nullarbor:~ # grep -i tcp /dynamic_debug/control | wc -l +42 + +Note in particular that the third column shows the enabled behaviour +flags for each debug statement callsite (see below for definitions of the +flags). The default value, no extra behaviour enabled, is "-". So +you can view all the debug statement callsites with any non-default flags: + +nullarbor:~ # awk '$3 != "-"' /dynamic_debug/control +# filename:lineno [module]function flags format +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process:\040st_sendto\040returned\040%d\012" + + +Command Language Reference +========================== + +At the lexical level, a command comprises a sequence of words separated +by whitespace characters. Note that newlines are treated as word +separators and do *not* end a command or allow multiple commands to +be done together. So these are all equivalent: + +nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' > + /dynamic_debug/control +nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' > + /dynamic_debug/control +nullarbor:~ # echo -c 'file svcsock.c\nline 1603 +p' > + /dynamic_debug/control +nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > + /dynamic_debug/control + +Commands are bounded by a write() system call. If you want to do +multiple commands you need to do a separate "echo" for each, like: + +nullarbor:~ # echo 'file svcsock.c line 1603 +p' > /proc/dprintk ;\ +> echo 'file svcsock.c line 1563 +p' > /proc/dprintk + +or even like: + +nullarbor:~ # ( +> echo 'file svcsock.c line 1603 +p' ;\ +> echo 'file svcsock.c line 1563 +p' ;\ +> ) > /proc/dprintk + +At the syntactical level, a command comprises a sequence of match +specifications, followed by a flags change specification. + +command ::= match-spec* flags-spec + +The match-spec's are used to choose a subset of the known dprintk() +callsites to which to apply the flags-spec. Think of them as a query +with implicit ANDs between each pair. Note that an empty list of +match-specs is possible, but is not very useful because it will not +match any debug statement callsites. + +A match specification comprises a keyword, which controls the attribute +of the callsite to be compared, and a value to compare against. Possible +keywords are: + +match-spec ::= 'func' string | + 'file' string | + 'module' string | + 'format' string | + 'line' line-range + +line-range ::= lineno | + '-'lineno | + lineno'-' | + lineno'-'lineno +// Note: line-range cannot contain space, e.g. +// "1-30" is valid range but "1 - 30" is not. + +lineno ::= unsigned-int + +The meanings of each keyword are: + +func + The given string is compared against the function name + of each callsite. Example: + + func svc_tcp_accept + +file + The given string is compared against either the full + pathname or the basename of the source file of each + callsite. Examples: + + file svcsock.c + file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c + +module + The given string is compared against the module name + of each callsite. The module name is the string as + seen in "lsmod", i.e. without the directory or the .ko + suffix and with '-' changed to '_'. Examples: + + module sunrpc + module nfsd + +format + The given string is searched for in the dynamic debug format + string. Note that the string does not need to match the + entire format, only some part. Whitespace and other + special characters can be escaped using C octal character + escape \ooo notation, e.g. the space character is \040. + Examples: + + format svcrdma: // many of the NFS/RDMA server dprintks + format readahead // some dprintks in the readahead cache + format nfsd:\040SETATTR // how to match a format with whitespace + +line + The given line number or range of line numbers is compared + against the line number of each dprintk() callsite. A single + line number matches the callsite line number exactly. A + range of line numbers matches any callsite between the first + and last line number inclusive. An empty first number means + the first line in the file, an empty line number means the + last number in the file. Examples: + + line 1603 // exactly line 1603 + line 1600-1605 // the six lines from line 1600 to line 1605 + line -1605 // the 1605 lines from line 1 to line 1605 + line 1600- // all lines from line 1600 to the end of the file + +The flags specification comprises a change operation followed +by one or more flag characters. The change operation is one +of the characters: + +- + remove the given flags + ++ + add the given flags + += + set the flags to the given flags + +The flags are: + +p + Causes a printk() message to be emitted to dmesg + +Note the regexp ^[-+=][scp]+$ matches a flags specification. +Note also that there is no convenient syntax to remove all +the flags at once, you need to use "-psc". + +Examples +======== + +// enable the message at line 1603 of file svcsock.c +nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > + /dynamic_debug/control + +// enable all the messages in file svcsock.c +nullarbor:~ # echo -n 'file svcsock.c +p' > + /dynamic_debug/control + +// enable all the messages in the NFS server module +nullarbor:~ # echo -n 'module nfsd +p' > + /dynamic_debug/control + +// enable all 12 messages in the function svc_process() +nullarbor:~ # echo -n 'func svc_process +p' > + /dynamic_debug/control + +// disable all 12 messages in the function svc_process() +nullarbor:~ # echo -n 'func svc_process -p' > + /dynamic_debug/control diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0dd1c04c7323..8fee0a13ac58 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -848,59 +848,69 @@ config BUILD_DOCSRC Say N if you are unsure. config DYNAMIC_DEBUG - bool "Enable dynamic printk() call support" + bool "Enable dynamic printk() support" default n depends on PRINTK + depends on DEBUG_FS select PRINTK_DEBUG help Compiles debug level messages into the kernel, which would not otherwise be available at runtime. These messages can then be - enabled/disabled on a per module basis. This mechanism implicitly - enables all pr_debug() and dev_dbg() calls. The impact of this - compile option is a larger kernel text size of about 2%. + enabled/disabled based on various levels of scope - per source file, + function, module, format string, and line number. This mechanism + implicitly enables all pr_debug() and dev_dbg() calls. The impact of + this compile option is a larger kernel text size of about 2%. Usage: - Dynamic debugging is controlled by the debugfs file, - dynamic_printk/modules. This file contains a list of the modules that - can be enabled. The format of the file is the module name, followed - by a set of flags that can be enabled. The first flag is always the - 'enabled' flag. For example: + Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, + which is contained in the 'debugfs' filesystem. Thus, the debugfs + filesystem must first be mounted before making use of this feature. + We refer the control file as: /dynamic_debug/ddebug. This + file contains a list of the debug statements that can be enabled. The + format for each line of the file is: - - . - . - . + filename:lineno [module]function flags format - : Name of the module in which the debug call resides - : whether the messages are enabled or not + filename : source file of the debug statement + lineno : line number of the debug statement + module : module that contains the debug statement + function : function that contains the debug statement + flags : 'p' means the line is turned 'on' for printing + format : the format used for the debug statement From a live system: - snd_hda_intel enabled=0 - fixup enabled=0 - driver enabled=0 + nullarbor:~ # cat /dynamic_debug/ddebug + # filename:lineno [module]function flags format + fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" + fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" + fs/aio.c:1770 [aio]sys_io_cancel - "calling\040cancel\012" - Enable a module: + Example usage: - $echo "set enabled=1 " > dynamic_printk/modules + // enable the message at line 1603 of file svcsock.c + nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > + /dynamic_debug/ddebug - Disable a module: + // enable all the messages in file svcsock.c + nullarbor:~ # echo -n 'file svcsock.c +p' > + /dynamic_debug/ddebug - $echo "set enabled=0 " > dynamic_printk/modules + // enable all the messages in the NFS server module + nullarbor:~ # echo -n 'module nfsd +p' > + /dynamic_debug/ddebug - Enable all modules: + // enable all 12 messages in the function svc_process() + nullarbor:~ # echo -n 'func svc_process +p' > + /dynamic_debug/ddebug - $echo "set enabled=1 all" > dynamic_printk/modules + // disable all 12 messages in the function svc_process() + nullarbor:~ # echo -n 'func svc_process -p' > + /dynamic_debug/ddebug - Disable all modules: - - $echo "set enabled=0 all" > dynamic_printk/modules - - Finally, passing "dynamic_printk" at the command line enables - debugging for all modules. This mode can be turned off via the above - disable command. + See Documentation/dynamic-debug-howto.txt for additional information. source "samples/Kconfig" -- cgit v1.2.3-59-g8ed1b From 9898abb3d23311fa227a7f46bf4e40fd2954057f Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Fri, 6 Feb 2009 12:54:26 +1100 Subject: Dynamic debug: allow simple quoting of words Allow simple quoting of words in the dynamic debug control language. This allows more natural specification when using the control language to match against printk formats, e.g #echo -n 'format "Setting node for non-present cpu" +p' > /mnt/debugfs/dynamic_debug/control instead of #echo -n 'format Setting\040node\040for\040non-present\040cpu +p' > /mnt/debugfs/dynamic_debug/control Adjust the dynamic debug documention to describe that and provide a new example. Adjust the existing examples in the documentation to reflect the current whitespace escaping behaviour when reading the control file. Fix some minor documentation trailing whitespace. Signed-off-by: Greg Banks Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/dynamic-debug-howto.txt | 20 +++++++++---- lib/dynamic_debug.c | 53 ++++++++++++++++++++++------------- 2 files changed, 47 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt index 68394825e86b..674c5663d346 100644 --- a/Documentation/dynamic-debug-howto.txt +++ b/Documentation/dynamic-debug-howto.txt @@ -49,10 +49,10 @@ via: nullarbor:~ # cat /dynamic_debug/control # filename:lineno [module]function flags format -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA\040Module\040Removed,\040deregister\040RPC\040RDMA\040transport\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline\040\040\040\040\040\040\040:\040%d\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth\040\040\040\040\040\040\040\040\040:\040%d\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests\040\040\040\040\040:\040%d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA Module Removed, deregister RPC RDMA transport\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline : %d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth : %d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests : %d\012" ... @@ -72,7 +72,7 @@ you can view all the debug statement callsites with any non-default flags: nullarbor:~ # awk '$3 != "-"' /dynamic_debug/control # filename:lineno [module]function flags format -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process:\040st_sendto\040returned\040%d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012" Command Language Reference @@ -166,11 +166,15 @@ format entire format, only some part. Whitespace and other special characters can be escaped using C octal character escape \ooo notation, e.g. the space character is \040. + Alternatively, the string can be enclosed in double quote + characters (") or single quote characters ('). Examples: format svcrdma: // many of the NFS/RDMA server dprintks format readahead // some dprintks in the readahead cache - format nfsd:\040SETATTR // how to match a format with whitespace + format nfsd:\040SETATTR // one way to match a format with whitespace + format "nfsd: SETATTR" // a neater way to match a format with whitespace + format 'nfsd: SETATTR' // yet another way to match a format with whitespace line The given line number or range of line numbers is compared @@ -230,3 +234,7 @@ nullarbor:~ # echo -n 'func svc_process +p' > // disable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process -p' > /dynamic_debug/control + +// enable messages for NFS calls READ, READLINK, READDIR and READDIR+. +nullarbor:~ # echo -n 'format "nfsd: READ" +p' > + /dynamic_debug/control diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 9e123ae326bc..833139ce1e22 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -195,33 +195,46 @@ static void ddebug_change(const struct ddebug_query *query, printk(KERN_INFO "ddebug: no matches for query\n"); } -/* - * Wrapper around strsep() to collapse the multiple empty tokens - * that it returns when fed sequences of separator characters. - * Now, if we had strtok_r()... - */ -static inline char *nearly_strtok_r(char **p, const char *sep) -{ - char *r; - - while ((r = strsep(p, sep)) != NULL && *r == '\0') - ; - return r; -} - /* * Split the buffer `buf' into space-separated words. - * Return the number of such words or <0 on error. + * Handles simple " and ' quoting, i.e. without nested, + * embedded or escaped \". Return the number of words + * or <0 on error. */ static int ddebug_tokenize(char *buf, char *words[], int maxwords) { int nwords = 0; - while (nwords < maxwords && - (words[nwords] = nearly_strtok_r(&buf, " \t\r\n")) != NULL) - nwords++; - if (buf) - return -EINVAL; /* ran out of words[] before bytes */ + while (*buf) { + char *end; + + /* Skip leading whitespace */ + while (*buf && isspace(*buf)) + buf++; + if (!*buf) + break; /* oh, it was trailing whitespace */ + + /* Run `end' over a word, either whitespace separated or quoted */ + if (*buf == '"' || *buf == '\'') { + int quote = *buf++; + for (end = buf ; *end && *end != quote ; end++) + ; + if (!*end) + return -EINVAL; /* unclosed quote */ + } else { + for (end = buf ; *end && !isspace(*end) ; end++) + ; + BUG_ON(end == buf); + } + /* Here `buf' is the start of the word, `end' is one past the end */ + + if (nwords == maxwords) + return -EINVAL; /* ran out of words[] before bytes */ + if (*end) + *end++ = '\0'; /* terminate the word */ + words[nwords++] = buf; + buf = end; + } if (verbose) { int i; -- cgit v1.2.3-59-g8ed1b From 93c36ed8348934b462044d2d60ab345055318933 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Mar 2009 14:08:44 -0700 Subject: dma-debug: fix printk formats (i386) Fix printk format warnings in dma-debug: lib/dma-debug.c:645: warning: format '%016llx' expects type 'long long unsigned int', but argument 6 has type 'dma_addr_t' lib/dma-debug.c:662: warning: format '%016llx' expects type 'long long unsigned int', but argument 6 has type 'dma_addr_t' lib/dma-debug.c:676: warning: format '%016llx' expects type 'long long unsigned int', but argument 6 has type 'dma_addr_t' lib/dma-debug.c:686: warning: format '%016llx' expects type 'long long unsigned int', but argument 6 has type 'dma_addr_t' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 1a992089486c..d3da7edc034f 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -648,7 +648,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, err_printk(dev, NULL, "DMA-API: device driver tries " "to sync DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", - addr, size); + (unsigned long long)addr, size); goto out; } @@ -666,7 +666,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, "DMA memory with different direction " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", - addr, entry->size, + (unsigned long long)addr, entry->size, dir2name[entry->direction], dir2name[direction]); } @@ -680,7 +680,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, "device read-only DMA memory for cpu " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", - addr, entry->size, + (unsigned long long)addr, entry->size, dir2name[entry->direction], dir2name[direction]); @@ -690,7 +690,7 @@ static void check_sync(struct device *dev, dma_addr_t addr, "device write-only DMA memory to device " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", - addr, entry->size, + (unsigned long long)addr, entry->size, dir2name[entry->direction], dir2name[direction]); -- cgit v1.2.3-59-g8ed1b From 6a11f75b6a17b5d9ac5025f8d048382fd1f47377 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 31 Mar 2009 15:23:17 -0700 Subject: generic debug pagealloc CONFIG_DEBUG_PAGEALLOC is now supported by x86, powerpc, sparc64, and s390. This patch implements it for the rest of the architectures by filling the pages with poison byte patterns after free_pages() and verifying the poison patterns before alloc_pages(). This generic one cannot detect invalid page accesses immediately but invalid read access may cause invalid dereference by poisoned memory and invalid write access can be detected after a long delay. Signed-off-by: Akinobu Mita Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/mm/fault.c | 18 ------ arch/powerpc/Kconfig | 3 + arch/powerpc/Kconfig.debug | 1 + arch/s390/Kconfig | 3 + arch/s390/Kconfig.debug | 1 + arch/sparc/Kconfig | 3 + arch/sparc/Kconfig.debug | 3 +- arch/x86/Kconfig | 3 + arch/x86/Kconfig.debug | 1 + include/linux/mm_types.h | 4 ++ include/linux/page-debug-flags.h | 30 +++++++++ include/linux/poison.h | 3 + lib/Kconfig.debug | 1 + mm/Kconfig.debug | 17 ++++++ mm/Makefile | 1 + mm/debug-pagealloc.c | 129 +++++++++++++++++++++++++++++++++++++++ 16 files changed, 202 insertions(+), 19 deletions(-) create mode 100644 include/linux/page-debug-flags.h create mode 100644 mm/Kconfig.debug create mode 100644 mm/debug-pagealloc.c (limited to 'lib') diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index ce4e4296b954..62d4abbaa654 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -250,21 +250,3 @@ asmlinkage void do_bus_error(unsigned long addr, int write_access, dump_dtlb(); die("Bus Error", regs, SIGKILL); } - -/* - * This functionality is currently not possible to implement because - * we're using segmentation to ensure a fixed mapping of the kernel - * virtual address space. - * - * It would be possible to implement this, but it would require us to - * disable segmentation at startup and load the kernel mappings into - * the TLB like any other pages. There will be lots of trickery to - * avoid recursive invocation of the TLB miss handler, though... - */ -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - -} -EXPORT_SYMBOL(kernel_map_pages); -#endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad6b1c084fe3..45192dce65c4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -228,6 +228,9 @@ config PPC_OF_PLATFORM_PCI depends on PPC64 # not supported on 32 bits yet default n +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 22091bbfdc9b..6aa0b5e087cd 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -30,6 +30,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2a8af5e16345..dcb667c4375a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,9 @@ config PGSTE config VIRT_CPU_ACCOUNTING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + mainmenu "Linux Kernel Configuration" config S390 diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 4599fa06bd82..7e297a3cde34 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -9,6 +9,7 @@ source "lib/Kconfig.debug" config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a slowdown, but helps to find certain types of diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c3ea215334f6..cc12cd48bbc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -124,6 +124,9 @@ config ARCH_NO_VIRT_TO_BUS config OF def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y if SPARC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index b8a15e271bfa..d001b42041a5 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -24,7 +24,8 @@ config STACK_DEBUG config DEBUG_PAGEALLOC bool "Debug page memory allocations" - depends on SPARC64 && DEBUG_KERNEL && !HIBERNATION + depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45161b816313..748e50a1a152 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,9 @@ config AUDIT_ARCH config ARCH_SUPPORTS_OPTIMIZED_INLINING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fdb45df608b6..a345cb5447a8 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -75,6 +75,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d84feb7bdbf0..ddadb4defe00 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -174,6 +175,9 @@ struct vm_area_struct { #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS + unsigned long debug_flags; /* Use atomic bitops on this */ +#endif }; struct core_thread { diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h new file mode 100644 index 000000000000..b0638fd91e92 --- /dev/null +++ b/include/linux/page-debug-flags.h @@ -0,0 +1,30 @@ +#ifndef LINUX_PAGE_DEBUG_FLAGS_H +#define LINUX_PAGE_DEBUG_FLAGS_H + +/* + * page->debug_flags bits: + * + * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to + * implement generic debug pagealloc feature. The pages are filled with + * poison patterns and set this flag after free_pages(). The poisoned + * pages are verified whether the patterns are not corrupted and clear + * the flag before alloc_pages(). + */ + +enum page_debug_flags { + PAGE_DEBUG_FLAG_POISON, /* Page is poisoned */ +}; + +/* + * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably + * gets turned off when no debug features are enabling it! + */ + +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS +#if !defined(CONFIG_PAGE_POISONING) \ +/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */ +#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features! +#endif +#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */ + +#endif /* LINUX_PAGE_DEBUG_FLAGS_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 9f31683728fd..6729f7dcd60e 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -17,6 +17,9 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x74737461) +/********** mm/debug-pagealloc.c **********/ +#define PAGE_POISON 0xaa + /********** mm/slab.c **********/ /* * Magic nums for obj red zoning. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 58bfe7e8faba..9638d99644af 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -796,6 +796,7 @@ config SYSCTL_SYSCALL_CHECK to properly maintain and use. This enables checks that help you to keep things correct. +source mm/Kconfig.debug source kernel/trace/Kconfig config PROVIDE_OHCI1394_DMA_INIT diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug new file mode 100644 index 000000000000..c8d62d49a44e --- /dev/null +++ b/mm/Kconfig.debug @@ -0,0 +1,17 @@ +config WANT_PAGE_DEBUG_FLAGS + bool + +config PAGE_POISONING + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION + select DEBUG_PAGEALLOC + select WANT_PAGE_DEBUG_FLAGS + help + Fill the pages with poison patterns after free_pages() and verify + the patterns before alloc_pages(). This results in a large slowdown, + but helps to find certain types of memory corruptions. + + This option cannot enalbe with hibernation. Otherwise, it will get + wrong messages for memory corruption because the free pages are not + saved to the suspend image. diff --git a/mm/Makefile b/mm/Makefile index 818569b68f46..ec73c68b6015 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o +obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_FAILSLAB) += failslab.o diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c new file mode 100644 index 000000000000..a1e3324de2b5 --- /dev/null +++ b/mm/debug-pagealloc.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include + +static inline void set_page_poison(struct page *page) +{ + __set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline void clear_page_poison(struct page *page) +{ + __clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline bool page_poison(struct page *page) +{ + return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static void poison_highpage(struct page *page) +{ + /* + * Page poisoning for highmem pages is not implemented. + * + * This can be called from interrupt contexts. + * So we need to create a new kmap_atomic slot for this + * application and it will need interrupt protection. + */ +} + +static void poison_page(struct page *page) +{ + void *addr; + + if (PageHighMem(page)) { + poison_highpage(page); + return; + } + set_page_poison(page); + addr = page_address(page); + memset(addr, PAGE_POISON, PAGE_SIZE); +} + +static void poison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + poison_page(page + i); +} + +static bool single_bit_flip(unsigned char a, unsigned char b) +{ + unsigned char error = a ^ b; + + return error && !(error & (error - 1)); +} + +static void check_poison_mem(unsigned char *mem, size_t bytes) +{ + unsigned char *start; + unsigned char *end; + + for (start = mem; start < mem + bytes; start++) { + if (*start != PAGE_POISON) + break; + } + if (start == mem + bytes) + return; + + for (end = mem + bytes - 1; end > start; end--) { + if (*end != PAGE_POISON) + break; + } + + if (!printk_ratelimit()) + return; + else if (start == end && single_bit_flip(*start, PAGE_POISON)) + printk(KERN_ERR "pagealloc: single bit error\n"); + else + printk(KERN_ERR "pagealloc: memory corruption\n"); + + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, + end - start + 1, 1); + dump_stack(); +} + +static void unpoison_highpage(struct page *page) +{ + /* + * See comment in poison_highpage(). + * Highmem pages should not be poisoned for now + */ + BUG_ON(page_poison(page)); +} + +static void unpoison_page(struct page *page) +{ + if (PageHighMem(page)) { + unpoison_highpage(page); + return; + } + if (page_poison(page)) { + void *addr = page_address(page); + + check_poison_mem(addr, PAGE_SIZE); + clear_page_poison(page); + } +} + +static void unpoison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + unpoison_page(page + i); +} + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled) + return; + + if (enable) + unpoison_pages(page, numpages); + else + poison_pages(page, numpages); +} -- cgit v1.2.3-59-g8ed1b From 55a63998b8967615a15e2211ba0ff3a84a565824 Mon Sep 17 00:00:00 2001 From: Wolfram Strepp Date: Tue, 31 Mar 2009 15:23:45 -0700 Subject: lib/rbtree.c: optimize rb_erase() Tfour 4 redundant if-conditions in function __rb_erase_color() in lib/rbtree.c are removed. In pseudo-source-code, the structure of the code is as follows: if ((!A || B) && (!C || D)) { . . . } else { if (!C || D) {//if this is true, it implies: (A == true) && (B == false) if (A) {//hence this always evaluates to 'true'... . } . //at this point, C always becomes true, because of: __rb_rotate_right/left(); //and: other = parent->rb_right/left; } . . if (C) {//...and this too ! . } } Signed-off-by: Wolfram Strepp Acked-by: Peter Zijlstra Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/rbtree.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/rbtree.c b/lib/rbtree.c index 9956b99649f0..f653659e0bc1 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -163,17 +163,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, { if (!other->rb_right || rb_is_black(other->rb_right)) { - struct rb_node *o_left; - if ((o_left = other->rb_left)) - rb_set_black(o_left); + rb_set_black(other->rb_left); rb_set_red(other); __rb_rotate_right(other, root); other = parent->rb_right; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); - if (other->rb_right) - rb_set_black(other->rb_right); + rb_set_black(other->rb_right); __rb_rotate_left(parent, root); node = root->rb_node; break; @@ -200,17 +197,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, { if (!other->rb_left || rb_is_black(other->rb_left)) { - register struct rb_node *o_right; - if ((o_right = other->rb_right)) - rb_set_black(o_right); + rb_set_black(other->rb_right); rb_set_red(other); __rb_rotate_left(other, root); other = parent->rb_left; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); - if (other->rb_left) - rb_set_black(other->rb_left); + rb_set_black(other->rb_left); __rb_rotate_right(parent, root); node = root->rb_node; break; -- cgit v1.2.3-59-g8ed1b From 38460b48d06440de46b34cb778bd6c4855030754 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 2 Apr 2009 16:57:25 -0700 Subject: cgroup: CSS ID support Patch for Per-CSS(Cgroup Subsys State) ID and private hierarchy code. This patch attaches unique ID to each css and provides following. - css_lookup(subsys, id) returns pointer to struct cgroup_subysys_state of id. - css_get_next(subsys, id, rootid, depth, foundid) returns the next css under "root" by scanning When cgroup_subsys->use_id is set, an id for css is maintained. The cgroup framework only parepares - css_id of root css for subsys - id is automatically attached at creation of css. - id is *not* freed automatically. Because the cgroup framework don't know lifetime of cgroup_subsys_state. free_css_id() function is provided. This must be called by subsys. There are several reasons to develop this. - Saving space .... For example, memcg's swap_cgroup is array of pointers to cgroup. But it is not necessary to be very fast. By replacing pointers(8bytes per ent) to ID (2byes per ent), we can reduce much amount of memory usage. - Scanning without lock. CSS_ID provides "scan id under this ROOT" function. By this, scanning css under root can be written without locks. ex) do { rcu_read_lock(); next = cgroup_get_next(subsys, id, root, &found); /* check sanity of next here */ css_tryget(); rcu_read_unlock(); id = found + 1 } while(...) Characteristics: - Each css has unique ID under subsys. - Lifetime of ID is controlled by subsys. - css ID contains "ID" and "Depth in hierarchy" and stack of hierarchy - Allowed ID is 1-65535, ID 0 is UNUSED ID. Design Choices: - scan-by-ID v.s. scan-by-tree-walk. As /proc's pid scan does, scan-by-ID is robust when scanning is done by following kind of routine. scan -> rest a while(release a lock) -> conitunue from interrupted memcg's hierarchical reclaim does this. - When subsys->use_id is set, # of css in the system is limited to 65535. [bharata@linux.vnet.ibm.com: remove rcu_read_lock() from css_get_next()] Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Paul Menage Cc: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Bharata B Rao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 50 +++++++++ include/linux/idr.h | 1 + kernel/cgroup.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++++- lib/idr.c | 46 ++++++++ 4 files changed, 382 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 788c4964c142..9a23bb098205 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -22,6 +23,7 @@ struct cgroupfs_root; struct cgroup_subsys; struct inode; struct cgroup; +struct css_id; extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -63,6 +65,8 @@ struct cgroup_subsys_state { atomic_t refcnt; unsigned long flags; + /* ID for this css, if possible */ + struct css_id *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -373,6 +377,11 @@ struct cgroup_subsys { int active; int disabled; int early_init; + /* + * True if this subsys uses ID. ID is not available before cgroup_init() + * (not available in early_init time.) + */ + bool use_id; #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; @@ -395,6 +404,9 @@ struct cgroup_subsys { */ struct cgroupfs_root *root; struct list_head sibling; + /* used when use_id == true */ + struct idr idr; + spinlock_t id_lock; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; @@ -450,6 +462,44 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); +/* + * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works + * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. + * CSS ID is assigned at cgroup allocation (create) automatically + * and removed when subsys calls free_css_id() function. This is because + * the lifetime of cgroup_subsys_state is subsys's matter. + * + * Looking up and scanning function should be called under rcu_read_lock(). + * Taking cgroup_mutex()/hierarchy_mutex() is not necessary for following calls. + * But the css returned by this routine can be "not populated yet" or "being + * destroyed". The caller should check css and cgroup's status. + */ + +/* + * Typically Called at ->destroy(), or somewhere the subsys frees + * cgroup_subsys_state. + */ +void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); + +/* Find a cgroup_subsys_state which has given ID */ + +struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id); + +/* + * Get a cgroup whose id is greater than or equal to id under tree of root. + * Returning a cgroup_subsys_state or NULL. + */ +struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id, + struct cgroup_subsys_state *root, int *foundid); + +/* Returns true if root is ancestor of cg */ +bool css_is_ancestor(struct cgroup_subsys_state *cg, + struct cgroup_subsys_state *root); + +/* Get id and depth of css */ +unsigned short css_id(struct cgroup_subsys_state *css); +unsigned short css_depth(struct cgroup_subsys_state *css); + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } diff --git a/include/linux/idr.h b/include/linux/idr.h index dd846df8cd32..e968db71e33a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -106,6 +106,7 @@ int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); +void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); void idr_remove_all(struct idr *idp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 27792bcb0758..d3c521137425 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -94,7 +94,6 @@ struct cgroupfs_root { char release_agent_path[PATH_MAX]; }; - /* * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the * subsystems that are otherwise unattached - it never has more than a @@ -102,6 +101,39 @@ struct cgroupfs_root { */ static struct cgroupfs_root rootnode; +/* + * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when + * cgroup_subsys->use_id != 0. + */ +#define CSS_ID_MAX (65535) +struct css_id { + /* + * The css to which this ID points. This pointer is set to valid value + * after cgroup is populated. If cgroup is removed, this will be NULL. + * This pointer is expected to be RCU-safe because destroy() + * is called after synchronize_rcu(). But for safe use, css_is_removed() + * css_tryget() should be used for avoiding race. + */ + struct cgroup_subsys_state *css; + /* + * ID of this css. + */ + unsigned short id; + /* + * Depth in hierarchy which this ID belongs to. + */ + unsigned short depth; + /* + * ID is freed by RCU. (and lookup routine is RCU safe.) + */ + struct rcu_head rcu_head; + /* + * Hierarchy of CSS ID belongs to. + */ + unsigned short stack[0]; /* Array of Length (depth+1) */ +}; + + /* The list of hierarchy roots */ static LIST_HEAD(roots); @@ -185,6 +217,8 @@ struct cg_cgroup_link { static struct css_set init_css_set; static struct cg_cgroup_link init_css_set_link; +static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); + /* css_set_lock protects the list of css_set objects, and the * chain of tasks off each css_set. Nests outside task->alloc_lock * due to cgroup_iter_start() */ @@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; +static int alloc_css_id(struct cgroup_subsys *ss, + struct cgroup *parent, struct cgroup *child); + static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) { struct inode *inode = new_inode(sb); @@ -2327,6 +2364,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp) if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) return err; } + /* This cgroup is ready now */ + for_each_subsys(cgrp->root, ss) { + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; + /* + * Update id->css pointer and make this css visible from + * CSS ID functions. This pointer will be dereferened + * from RCU-read-side without locks. + */ + if (css->id) + rcu_assign_pointer(css->id->css, css); + } return 0; } @@ -2338,6 +2386,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, css->cgroup = cgrp; atomic_set(&css->refcnt, 1); css->flags = 0; + css->id = NULL; if (cgrp == dummytop) set_bit(CSS_ROOT, &css->flags); BUG_ON(cgrp->subsys[ss->subsys_id]); @@ -2413,6 +2462,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } init_cgroup_css(css, ss, cgrp); + if (ss->use_id) + if (alloc_css_id(ss, parent, cgrp)) + goto err_destroy; + /* At error, ->destroy() callback has to free assigned ID. */ } cgroup_lock_hierarchy(root); @@ -2708,6 +2761,8 @@ int __init cgroup_init(void) struct cgroup_subsys *ss = subsys[i]; if (!ss->early_init) cgroup_init_subsys(ss); + if (ss->use_id) + cgroup_subsys_init_idr(ss); } /* Add init_css_set to the hash table */ @@ -3242,3 +3297,232 @@ static int __init cgroup_disable(char *str) return 1; } __setup("cgroup_disable=", cgroup_disable); + +/* + * Functons for CSS ID. + */ + +/* + *To get ID other than 0, this should be called when !cgroup_is_removed(). + */ +unsigned short css_id(struct cgroup_subsys_state *css) +{ + struct css_id *cssid = rcu_dereference(css->id); + + if (cssid) + return cssid->id; + return 0; +} + +unsigned short css_depth(struct cgroup_subsys_state *css) +{ + struct css_id *cssid = rcu_dereference(css->id); + + if (cssid) + return cssid->depth; + return 0; +} + +bool css_is_ancestor(struct cgroup_subsys_state *child, + struct cgroup_subsys_state *root) +{ + struct css_id *child_id = rcu_dereference(child->id); + struct css_id *root_id = rcu_dereference(root->id); + + if (!child_id || !root_id || (child_id->depth < root_id->depth)) + return false; + return child_id->stack[root_id->depth] == root_id->id; +} + +static void __free_css_id_cb(struct rcu_head *head) +{ + struct css_id *id; + + id = container_of(head, struct css_id, rcu_head); + kfree(id); +} + +void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) +{ + struct css_id *id = css->id; + /* When this is called before css_id initialization, id can be NULL */ + if (!id) + return; + + BUG_ON(!ss->use_id); + + rcu_assign_pointer(id->css, NULL); + rcu_assign_pointer(css->id, NULL); + spin_lock(&ss->id_lock); + idr_remove(&ss->idr, id->id); + spin_unlock(&ss->id_lock); + call_rcu(&id->rcu_head, __free_css_id_cb); +} + +/* + * This is called by init or create(). Then, calls to this function are + * always serialized (By cgroup_mutex() at create()). + */ + +static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) +{ + struct css_id *newid; + int myid, error, size; + + BUG_ON(!ss->use_id); + + size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1); + newid = kzalloc(size, GFP_KERNEL); + if (!newid) + return ERR_PTR(-ENOMEM); + /* get id */ + if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { + error = -ENOMEM; + goto err_out; + } + spin_lock(&ss->id_lock); + /* Don't use 0. allocates an ID of 1-65535 */ + error = idr_get_new_above(&ss->idr, newid, 1, &myid); + spin_unlock(&ss->id_lock); + + /* Returns error when there are no free spaces for new ID.*/ + if (error) { + error = -ENOSPC; + goto err_out; + } + if (myid > CSS_ID_MAX) + goto remove_idr; + + newid->id = myid; + newid->depth = depth; + return newid; +remove_idr: + error = -ENOSPC; + spin_lock(&ss->id_lock); + idr_remove(&ss->idr, myid); + spin_unlock(&ss->id_lock); +err_out: + kfree(newid); + return ERR_PTR(error); + +} + +static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) +{ + struct css_id *newid; + struct cgroup_subsys_state *rootcss; + + spin_lock_init(&ss->id_lock); + idr_init(&ss->idr); + + rootcss = init_css_set.subsys[ss->subsys_id]; + newid = get_new_cssid(ss, 0); + if (IS_ERR(newid)) + return PTR_ERR(newid); + + newid->stack[0] = newid->id; + newid->css = rootcss; + rootcss->id = newid; + return 0; +} + +static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, + struct cgroup *child) +{ + int subsys_id, i, depth = 0; + struct cgroup_subsys_state *parent_css, *child_css; + struct css_id *child_id, *parent_id = NULL; + + subsys_id = ss->subsys_id; + parent_css = parent->subsys[subsys_id]; + child_css = child->subsys[subsys_id]; + depth = css_depth(parent_css) + 1; + parent_id = parent_css->id; + + child_id = get_new_cssid(ss, depth); + if (IS_ERR(child_id)) + return PTR_ERR(child_id); + + for (i = 0; i < depth; i++) + child_id->stack[i] = parent_id->stack[i]; + child_id->stack[depth] = child_id->id; + /* + * child_id->css pointer will be set after this cgroup is available + * see cgroup_populate_dir() + */ + rcu_assign_pointer(child_css->id, child_id); + + return 0; +} + +/** + * css_lookup - lookup css by id + * @ss: cgroup subsys to be looked into. + * @id: the id + * + * Returns pointer to cgroup_subsys_state if there is valid one with id. + * NULL if not. Should be called under rcu_read_lock() + */ +struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) +{ + struct css_id *cssid = NULL; + + BUG_ON(!ss->use_id); + cssid = idr_find(&ss->idr, id); + + if (unlikely(!cssid)) + return NULL; + + return rcu_dereference(cssid->css); +} + +/** + * css_get_next - lookup next cgroup under specified hierarchy. + * @ss: pointer to subsystem + * @id: current position of iteration. + * @root: pointer to css. search tree under this. + * @foundid: position of found object. + * + * Search next css under the specified hierarchy of rootid. Calling under + * rcu_read_lock() is necessary. Returns NULL if it reaches the end. + */ +struct cgroup_subsys_state * +css_get_next(struct cgroup_subsys *ss, int id, + struct cgroup_subsys_state *root, int *foundid) +{ + struct cgroup_subsys_state *ret = NULL; + struct css_id *tmp; + int tmpid; + int rootid = css_id(root); + int depth = css_depth(root); + + if (!rootid) + return NULL; + + BUG_ON(!ss->use_id); + /* fill start point for scan */ + tmpid = id; + while (1) { + /* + * scan next entry from bitmap(tree), tmpid is updated after + * idr_get_next(). + */ + spin_lock(&ss->id_lock); + tmp = idr_get_next(&ss->idr, &tmpid); + spin_unlock(&ss->id_lock); + + if (!tmp) + break; + if (tmp->depth >= depth && tmp->stack[depth] == rootid) { + ret = rcu_dereference(tmp->css); + if (ret) { + *foundid = tmpid; + break; + } + } + /* continue to scan from next id */ + tmpid = tmpid + 1; + } + return ret; +} + diff --git a/lib/idr.c b/lib/idr.c index dab4bca86f5d..80ca9aca038b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -578,6 +578,52 @@ int idr_for_each(struct idr *idp, } EXPORT_SYMBOL(idr_for_each); +/** + * idr_get_next - lookup next object of id to given id. + * @idp: idr handle + * @id: pointer to lookup key + * + * Returns pointer to registered object with id, which is next number to + * given id. + */ + +void *idr_get_next(struct idr *idp, int *nextidp) +{ + struct idr_layer *p, *pa[MAX_LEVEL]; + struct idr_layer **paa = &pa[0]; + int id = *nextidp; + int n, max; + + /* find first ent */ + n = idp->layers * IDR_BITS; + max = 1 << n; + p = rcu_dereference(idp->top); + if (!p) + return NULL; + + while (id < max) { + while (n > 0 && p) { + n -= IDR_BITS; + *paa++ = p; + p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + } + + if (p) { + *nextidp = id; + return p; + } + + id += 1 << n; + while (n < fls(id)) { + n += IDR_BITS; + p = *--paa; + } + } + return NULL; +} + + + /** * idr_replace - replace pointer for given id * @idp: idr handle -- cgit v1.2.3-59-g8ed1b From 4f032ac4122a77dbabf7a24b2739b2790448180f Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 2 Apr 2009 16:59:48 -0700 Subject: cpumask: fix slab corruption caused by alloc_cpumask_var_node() Fix slab corruption caused by alloc_cpumask_var_node() overwriting the tail end of an off-stack cpumask. The function zeros out cpumask bits beyond the last possible cpu. The starting point for zeroing should be the beginning of the mask offset by a byte count derived from the number of possible cpus. The offset was calculated in bits instead of bytes. This resulted in overwriting the end of the cpumask. Signed-off-by: Jack Steiner Acked-by: Mike Travis Acked-by: Ingo Molnar Cc: Rusty Russell Cc: Stephen Rothwell Cc: [2.6.29.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/cpumask.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index 3389e2440da0..1f71b97de0f9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -109,10 +109,10 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) #endif /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ if (*mask) { + unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); unsigned int tail; tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); - memset(cpumask_bits(*mask) + cpumask_size() - tail, - 0, tail); + memset(ptr + cpumask_size() - tail, 0, tail); } return *mask != NULL; -- cgit v1.2.3-59-g8ed1b From 079effb6933f34b9b1b67b08bd4fd7fb672d16ef Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 24 Mar 2009 11:13:50 +0200 Subject: kmemtrace, kbuild: fix slab.h dependency problem in lib/decompress_inflate.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix build lib/decompress_inflate.c depends on slab.h without including it: CC lib/decompress_inflate.o lib/decompress_inflate.c: In function ‘gunzip’: lib/decompress_inflate.c:45: error: implicit declaration of function ‘kmalloc’ lib/decompress_inflate.c:45: warning: assignment makes pointer from integer without a cast lib/decompress_inflate.c:57: warning: assignment makes pointer from integer without a cast lib/decompress_inflate.c:65: warning: assignment makes pointer from integer without a cast lib/decompress_inflate.c:71: warning: assignment makes pointer from integer without a cast lib/decompress_inflate.c:154: error: implicit declaration of function ‘kfree’ make[1]: *** [lib/decompress_inflate.o] Error 1 make: *** [lib/] Error 2 It gets included implicitly currently - but this will not be the case with upcoming kmemtrace changes. Signed-off-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu LKML-Reference: <1237886030.25315.47.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- lib/decompress_inflate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 839a329b4fc4..e36b296fc9f8 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -23,6 +23,7 @@ #endif /* STATIC */ #include +#include #define INBUF_LEN (16*1024) -- cgit v1.2.3-59-g8ed1b From ba56617ef37f2be31f46f9533057e173b778602e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 24 Mar 2009 11:13:52 +0200 Subject: kmemtrace, kbuild: fix slab.h dependency problem in lib/decompress_bunzip2.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup lib/decompress_bunzip2.c depends on slab.h without including it: CC lib/decompress_bunzip2.o lib/decompress_bunzip2.c: In function ‘start_bunzip’: lib/decompress_bunzip2.c:636: error: implicit declaration of function ‘kmalloc’ lib/decompress_bunzip2.c:636: warning: assignment makes pointer from integer without a cast lib/decompress_bunzip2.c: In function ‘bunzip2’: lib/decompress_bunzip2.c:682: warning: assignment makes pointer from integer without a cast lib/decompress_bunzip2.c:693: warning: assignment makes pointer from integer without a cast lib/decompress_bunzip2.c:726: error: implicit declaration of function ‘kfree’ make[1]: *** [lib/decompress_bunzip2.o] Error 1 make: *** [lib/] Error 2 It gets included implicitly currently - but this will not be the case with upcoming kmemtrace changes. Signed-off-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu LKML-Reference: <1237886032.25315.48.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- lib/decompress_bunzip2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 5d3ddb5fcfd9..708e2a86d87b 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -50,6 +50,7 @@ #endif /* !STATIC */ #include +#include #ifndef INT_MAX #define INT_MAX 0x7fffffff -- cgit v1.2.3-59-g8ed1b From e65a1b7c390a72fbcefb2639e255fb7f145538d3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 24 Mar 2009 11:22:01 +0200 Subject: kmemtrace, kbuild: fix slab.h dependency problem in lib/decompress_unlzma.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup lib/decompress_unlzma.c depends on slab.h without including it: CC lib/decompress_unlzma.o lib/decompress_unlzma.c: In function ‘rc_free’: lib/decompress_unlzma.c:122: error: implicit declaration of function ‘kfree’ lib/decompress_unlzma.c: In function ‘unlzma’: lib/decompress_unlzma.c:551: error: implicit declaration of function ‘kmalloc’ lib/decompress_unlzma.c:551: warning: assignment makes pointer from integer without a cast make[1]: *** [lib/decompress_unlzma.o] Error 1 make: *** [lib/] Error 2 It gets included implicitly currently - but this will not be the case with upcoming kmemtrace changes. Signed-off-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu LKML-Reference: <1237886521.25315.58.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- lib/decompress_unlzma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 546f2f4c157e..32123a1340e6 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -34,6 +34,7 @@ #endif /* STATIC */ #include +#include #define MIN(a, b) (((a) < (b)) ? (a) : (b)) -- cgit v1.2.3-59-g8ed1b