diff options
Diffstat (limited to '')
139 files changed, 16080 insertions, 4465 deletions
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 4c1966f7c77a..9b3c528bab92 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -4,6 +4,7 @@ #include <linux/compiler.h> #include <linux/types.h> +#include <linux/bitops.h> /* * Atomic operations that C can't guarantee us. Useful for @@ -69,4 +70,26 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) return cmpxchg(&(v)->counter, oldval, newval); } +static inline int test_and_set_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_or(addr, mask); + return !!(old & mask); +} + +static inline int test_and_clear_bit(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + long old; + + addr += BIT_WORD(nr); + + old = __sync_fetch_and_and(addr, ~mask); + return !!(old & mask); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 5d2ab38965cc..9ab313e93555 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -18,7 +18,6 @@ #include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> -#include <asm-generic/bitops/find.h> #ifndef _TOOLS_LINUX_BITOPS_H_ #error only <linux/bitops.h> can be included directly diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h index 2f6ea28764a7..ab37a221b41a 100644 --- a/tools/include/asm-generic/bitops/atomic.h +++ b/tools/include/asm-generic/bitops/atomic.h @@ -5,14 +5,11 @@ #include <asm/types.h> #include <asm/bitsperlong.h> -static inline void set_bit(int nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); -} - -static inline void clear_bit(int nr, unsigned long *addr) -{ - addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); -} +/* + * Just alias the test versions, all of the compiler built-in atomics "fetch", + * and optimizing compile-time constants on x86 isn't worth the complexity. + */ +#define set_bit test_and_set_bit +#define clear_bit test_and_clear_bit #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */ diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h deleted file mode 100644 index 16ed1982cb34..000000000000 --- a/tools/include/asm-generic/bitops/find.h +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ -#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ - -#ifndef find_next_bit -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -extern unsigned long find_next_bit(const unsigned long *addr, unsigned long - size, unsigned long offset); -#endif - -#ifndef find_next_and_bit -/** - * find_next_and_bit - find the next set bit in both memory regions - * @addr1: The first address to base the search on - * @addr2: The second address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -extern unsigned long find_next_and_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long size, - unsigned long offset); -#endif - -#ifndef find_next_zero_bit - -/** - * find_next_zero_bit - find the next cleared bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number of the next zero bit - * If no bits are zero, returns @size. - */ -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset); -#endif - -#ifndef find_first_bit - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first set bit. - * If no bits are set, returns @size. - */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); - -#endif /* find_first_bit */ - -#ifndef find_first_zero_bit - -/** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first cleared bit. - * If no bits are zero, returns @size. - */ -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); -#endif - -#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h index 7e10c4b50c5d..0c472a833408 100644 --- a/tools/include/asm-generic/bitops/non-atomic.h +++ b/tools/include/asm-generic/bitops/non-atomic.h @@ -2,10 +2,10 @@ #ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ -#include <asm/types.h> +#include <linux/bits.h> /** - * __set_bit - Set a bit in memory + * ___set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @@ -13,7 +13,8 @@ * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile unsigned long *addr) +static __always_inline void +___set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -21,7 +22,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) *p |= mask; } -static inline void __clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +___clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -30,7 +32,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) } /** - * __change_bit - Toggle a bit in memory + * ___change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * @@ -38,7 +40,8 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile unsigned long *addr) +static __always_inline void +___change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -47,7 +50,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) } /** - * __test_and_set_bit - Set a bit and return its old value + * ___test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * @@ -55,7 +58,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +static __always_inline bool +___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -66,7 +70,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) } /** - * __test_and_clear_bit - Clear a bit and return its old value + * ___test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * @@ -74,7 +78,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline bool +___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -85,8 +90,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, - volatile unsigned long *addr) +static __always_inline bool +___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -97,11 +102,12 @@ static inline int __test_and_change_bit(int nr, } /** - * test_bit - Determine whether a bit is set + * _test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ -static inline int test_bit(int nr, const volatile unsigned long *addr) +static __always_inline bool +_test_bit(unsigned long nr, const volatile unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h index 8f2283052333..2093d56ddd11 100644 --- a/tools/include/asm-generic/bitsperlong.h +++ b/tools/include/asm-generic/bitsperlong.h @@ -18,4 +18,7 @@ #define BITS_PER_LONG_LONG 64 #endif +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h index e4732d3c2998..de687009bfe5 100644 --- a/tools/include/asm-generic/hugetlb_encode.h +++ b/tools/include/asm-generic/hugetlb_encode.h @@ -20,15 +20,18 @@ #define HUGETLB_FLAG_ENCODE_SHIFT 26 #define HUGETLB_FLAG_ENCODE_MASK 0x3f -#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) -#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h new file mode 100644 index 000000000000..cdd2fd078027 --- /dev/null +++ b/tools/include/asm-generic/unaligned.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_GENERIC_UNALIGNED_H +#define __ASM_GENERIC_UNALIGNED_H + +/* + * This is the most generic implementation of unaligned accesses + * and should work almost anywhere. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) +#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) + +static inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpu(__get_unaligned_t(__le16, p)); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpu(__get_unaligned_t(__le32, p)); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpu(__get_unaligned_t(__le64, p)); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_t(__le16, cpu_to_le16(val), p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_t(__le32, cpu_to_le32(val), p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_t(__le64, cpu_to_le64(val), p); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpu(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpu(__get_unaligned_t(__be32, p)); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpu(__get_unaligned_t(__be64, p)); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_t(__be16, cpu_to_be16(val), p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_t(__be32, cpu_to_be32(val), p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_t(__be64, cpu_to_be64(val), p); +} + +static inline u32 __get_unaligned_be24(const u8 *p) +{ + return p[0] << 16 | p[1] << 8 | p[2]; +} + +static inline u32 get_unaligned_be24(const void *p) +{ + return __get_unaligned_be24(p); +} + +static inline u32 __get_unaligned_le24(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16; +} + +static inline u32 get_unaligned_le24(const void *p) +{ + return __get_unaligned_le24(p); +} + +static inline void __put_unaligned_be24(const u32 val, u8 *p) +{ + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; +} + +static inline void put_unaligned_be24(const u32 val, void *p) +{ + __put_unaligned_be24(val, p); +} + +static inline void __put_unaligned_le24(const u32 val, u8 *p) +{ + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; +} + +static inline void put_unaligned_le24(const u32 val, void *p) +{ + __put_unaligned_le24(val, p); +} + +static inline void __put_unaligned_be48(const u64 val, u8 *p) +{ + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; +} + +static inline void put_unaligned_be48(const u64 val, void *p) +{ + __put_unaligned_be48(val, p); +} + +static inline u64 __get_unaligned_be48(const u8 *p) +{ + return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 | + p[3] << 16 | p[4] << 8 | p[5]; +} + +static inline u64 get_unaligned_be48(const void *p) +{ + return __get_unaligned_be48(p); +} +#pragma GCC diagnostic pop + +#endif /* __ASM_GENERIC_UNALIGNED_H */ diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h deleted file mode 100644 index b54bd860dff6..000000000000 --- a/tools/include/asm/alternative-asm.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H -#define _TOOLS_ASM_ALTERNATIVE_ASM_H - -/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ - -#define altinstruction_entry # -#define ALTERNATIVE_2 # - -#endif diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h new file mode 100644 index 000000000000..7ce02a223732 --- /dev/null +++ b/tools/include/asm/alternative.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H +#define _TOOLS_ASM_ALTERNATIVE_ASM_H + +/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ + +#define ALTERNATIVE # + +#endif diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index dc696c151e1c..8d378c57cb01 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -24,8 +24,6 @@ #include "../../arch/ia64/include/asm/barrier.h" #elif defined(__xtensa__) #include "../../arch/xtensa/include/asm/barrier.h" -#elif defined(__nds32__) -#include "../../arch/nds32/include/asm/barrier.h" #else #include <asm-generic/barrier.h> #endif diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h new file mode 100644 index 000000000000..9ccb16074eb5 --- /dev/null +++ b/tools/include/io_uring/mini_liburing.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: MIT */ + +#include <linux/io_uring.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +struct io_sq_ring { + unsigned int *head; + unsigned int *tail; + unsigned int *ring_mask; + unsigned int *ring_entries; + unsigned int *flags; + unsigned int *array; +}; + +struct io_cq_ring { + unsigned int *head; + unsigned int *tail; + unsigned int *ring_mask; + unsigned int *ring_entries; + struct io_uring_cqe *cqes; +}; + +struct io_uring_sq { + unsigned int *khead; + unsigned int *ktail; + unsigned int *kring_mask; + unsigned int *kring_entries; + unsigned int *kflags; + unsigned int *kdropped; + unsigned int *array; + struct io_uring_sqe *sqes; + + unsigned int sqe_head; + unsigned int sqe_tail; + + size_t ring_sz; +}; + +struct io_uring_cq { + unsigned int *khead; + unsigned int *ktail; + unsigned int *kring_mask; + unsigned int *kring_entries; + unsigned int *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + int ring_fd; +}; + +#if defined(__x86_64) || defined(__i386__) +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") +#else +#define read_barrier() __sync_synchronize() +#define write_barrier() __sync_synchronize() +#endif + +static inline int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + void *ptr; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (ptr == MAP_FAILED) + return -errno; + sq->khead = ptr + p->sq_off.head; + sq->ktail = ptr + p->sq_off.tail; + sq->kring_mask = ptr + p->sq_off.ring_mask; + sq->kring_entries = ptr + p->sq_off.ring_entries; + sq->kflags = ptr + p->sq_off.flags; + sq->kdropped = ptr + p->sq_off.dropped; + sq->array = ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe); + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + munmap(sq->khead, sq->ring_sz); + return ret; + } + + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (ptr == MAP_FAILED) { + ret = -errno; + munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); + goto err; + } + cq->khead = ptr + p->cq_off.head; + cq->ktail = ptr + p->cq_off.tail; + cq->kring_mask = ptr + p->cq_off.ring_mask; + cq->kring_entries = ptr + p->cq_off.ring_entries; + cq->koverflow = ptr + p->cq_off.overflow; + cq->cqes = ptr + p->cq_off.cqes; + return 0; +} + +static inline int io_uring_setup(unsigned int entries, + struct io_uring_params *p) +{ + return syscall(__NR_io_uring_setup, entries, p); +} + +static inline int io_uring_enter(int fd, unsigned int to_submit, + unsigned int min_complete, + unsigned int flags, sigset_t *sig) +{ + return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} + +static inline int io_uring_queue_init(unsigned int entries, + struct io_uring *ring, + unsigned int flags) +{ + struct io_uring_params p; + int fd, ret; + + memset(ring, 0, sizeof(*ring)); + memset(&p, 0, sizeof(p)); + p.flags = flags; + + fd = io_uring_setup(entries, &p); + if (fd < 0) + return fd; + ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); + if (!ret) + ring->ring_fd = fd; + else + close(fd); + return ret; +} + +/* Get a sqe */ +static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) + return NULL; + return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; +} + +static inline int io_uring_wait_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + struct io_uring_cq *cq = &ring->cq; + const unsigned int mask = *cq->kring_mask; + unsigned int head = *cq->khead; + int ret; + + *cqe_ptr = NULL; + do { + read_barrier(); + if (head != *cq->ktail) { + *cqe_ptr = &cq->cqes[head & mask]; + break; + } + ret = io_uring_enter(ring->ring_fd, 0, 1, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + } while (1); + + return 0; +} + +static inline int io_uring_submit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned int mask = *sq->kring_mask; + unsigned int ktail, submitted, to_submit; + int ret; + + read_barrier(); + if (*sq->khead != *sq->ktail) { + submitted = *sq->kring_entries; + goto submit; + } + if (sq->sqe_head == sq->sqe_tail) + return 0; + + ktail = *sq->ktail; + to_submit = sq->sqe_tail - sq->sqe_head; + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + if (!submitted) + return 0; + + if (*sq->ktail != ktail) { + write_barrier(); + *sq->ktail = ktail; + write_barrier(); + } +submit: + ret = io_uring_enter(ring->ring_fd, submitted, 0, + IORING_ENTER_GETEVENTS, NULL); + return ret < 0 ? -errno : ret; +} + +static inline void io_uring_queue_exit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); + munmap(sq->khead, sq->ring_sz); + close(ring->ring_fd); +} + +/* Prepare and send the SQE */ +static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op, + int sockfd, + int level, int optname, + const void *optval, + int optlen) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8)IORING_OP_URING_CMD; + sqe->fd = sockfd; + sqe->cmd_op = op; + + sqe->level = level; + sqe->optname = optname; + sqe->optval = (unsigned long long)optval; + sqe->optlen = optlen; +} + +static inline int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned int nr_iovecs) +{ + int ret; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); + return (ret < 0) ? -errno : ret; +} + +static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8)IORING_OP_SEND; + sqe->fd = sockfd; + sqe->addr = (unsigned long)buf; + sqe->len = len; + sqe->msg_flags = (__u32)flags; +} + +static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags, + unsigned int zc_flags) +{ + io_uring_prep_send(sqe, sockfd, buf, len, flags); + sqe->opcode = (__u8)IORING_OP_SEND_ZC; + sqe->ioprio = zc_flags; +} + +static inline void io_uring_cqe_seen(struct io_uring *ring) +{ + *(&ring->cq)->khead += 1; + write_barrier(); +} diff --git a/tools/include/linux/arm-smccc.h b/tools/include/linux/arm-smccc.h new file mode 100644 index 000000000000..63ce9bebccd3 --- /dev/null +++ b/tools/include/linux/arm-smccc.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015, Linaro Limited + */ +#ifndef __LINUX_ARM_SMCCC_H +#define __LINUX_ARM_SMCCC_H + +#include <linux/const.h> + +/* + * This file provides common defines for ARM SMC Calling Convention as + * specified in + * https://developer.arm.com/docs/den0028/latest + * + * This code is up-to-date with version DEN 0028 C + */ + +#define ARM_SMCCC_STD_CALL _AC(0,U) +#define ARM_SMCCC_FAST_CALL _AC(1,U) +#define ARM_SMCCC_TYPE_SHIFT 31 + +#define ARM_SMCCC_SMC_32 0 +#define ARM_SMCCC_SMC_64 1 +#define ARM_SMCCC_CALL_CONV_SHIFT 30 + +#define ARM_SMCCC_OWNER_MASK 0x3F +#define ARM_SMCCC_OWNER_SHIFT 24 + +#define ARM_SMCCC_FUNC_MASK 0xFFFF + +#define ARM_SMCCC_IS_FAST_CALL(smc_val) \ + ((smc_val) & (ARM_SMCCC_FAST_CALL << ARM_SMCCC_TYPE_SHIFT)) +#define ARM_SMCCC_IS_64(smc_val) \ + ((smc_val) & (ARM_SMCCC_SMC_64 << ARM_SMCCC_CALL_CONV_SHIFT)) +#define ARM_SMCCC_FUNC_NUM(smc_val) ((smc_val) & ARM_SMCCC_FUNC_MASK) +#define ARM_SMCCC_OWNER_NUM(smc_val) \ + (((smc_val) >> ARM_SMCCC_OWNER_SHIFT) & ARM_SMCCC_OWNER_MASK) + +#define ARM_SMCCC_CALL_VAL(type, calling_convention, owner, func_num) \ + (((type) << ARM_SMCCC_TYPE_SHIFT) | \ + ((calling_convention) << ARM_SMCCC_CALL_CONV_SHIFT) | \ + (((owner) & ARM_SMCCC_OWNER_MASK) << ARM_SMCCC_OWNER_SHIFT) | \ + ((func_num) & ARM_SMCCC_FUNC_MASK)) + +#define ARM_SMCCC_OWNER_ARCH 0 +#define ARM_SMCCC_OWNER_CPU 1 +#define ARM_SMCCC_OWNER_SIP 2 +#define ARM_SMCCC_OWNER_OEM 3 +#define ARM_SMCCC_OWNER_STANDARD 4 +#define ARM_SMCCC_OWNER_STANDARD_HYP 5 +#define ARM_SMCCC_OWNER_VENDOR_HYP 6 +#define ARM_SMCCC_OWNER_TRUSTED_APP 48 +#define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 +#define ARM_SMCCC_OWNER_TRUSTED_OS 50 +#define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 + +#define ARM_SMCCC_FUNC_QUERY_CALL_UID 0xff01 + +#define ARM_SMCCC_QUIRK_NONE 0 +#define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ + +#define ARM_SMCCC_VERSION_1_0 0x10000 +#define ARM_SMCCC_VERSION_1_1 0x10001 +#define ARM_SMCCC_VERSION_1_2 0x10002 +#define ARM_SMCCC_VERSION_1_3 0x10003 + +#define ARM_SMCCC_1_3_SVE_HINT 0x10000 + +#define ARM_SMCCC_VERSION_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0) + +#define ARM_SMCCC_ARCH_FEATURES_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 1) + +#define ARM_SMCCC_ARCH_SOC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 2) + +#define ARM_SMCCC_ARCH_WORKAROUND_1 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x8000) + +#define ARM_SMCCC_ARCH_WORKAROUND_2 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x7fff) + +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + +#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_FUNC_QUERY_CALL_UID) + +/* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +/* KVM "vendor specific" services */ +#define ARM_SMCCC_KVM_FUNC_FEATURES 0 +#define ARM_SMCCC_KVM_FUNC_PTP 1 +#define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 +#define ARM_SMCCC_KVM_NUM_FUNCS 128 + +#define ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_FEATURES) + +#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 + +/* + * ptp_kvm is a feature used for time sync between vm and host. + * ptp_kvm module in guest kernel will get service from host using + * this hypercall ID. + */ +#define ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_PTP) + +/* ptp_kvm counter type ID */ +#define KVM_PTP_VIRT_COUNTER 0 +#define KVM_PTP_PHYS_COUNTER 1 + +/* Paravirtualised time calls (defined by ARM DEN0057A) */ +#define ARM_SMCCC_HV_PV_TIME_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x20) + +#define ARM_SMCCC_HV_PV_TIME_ST \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x21) + +/* TRNG entropy source calls (defined by ARM DEN0098) */ +#define ARM_SMCCC_TRNG_VERSION \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x50) + +#define ARM_SMCCC_TRNG_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x51) + +#define ARM_SMCCC_TRNG_GET_UUID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x52) + +#define ARM_SMCCC_TRNG_RND32 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x53) + +#define ARM_SMCCC_TRNG_RND64 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x53) + +/* + * Return codes defined in ARM DEN 0070A + * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C + */ +#define SMCCC_RET_SUCCESS 0 +#define SMCCC_RET_NOT_SUPPORTED -1 +#define SMCCC_RET_NOT_REQUIRED -2 +#define SMCCC_RET_INVALID_PARAMETER -3 + +#endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 00a6c4ca562b..01907b33537e 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -4,6 +4,8 @@ #include <asm/atomic.h> +void atomic_long_set(atomic_long_t *v, long i); + /* atomic_cmpxchg_relaxed */ #ifndef atomic_cmpxchg_relaxed #define atomic_cmpxchg_relaxed atomic_cmpxchg diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h new file mode 100644 index 000000000000..6093fa6db260 --- /dev/null +++ b/tools/include/linux/bitfield.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name> + * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com> + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include <linux/build_bug.h> +#include <asm/byteorder.h> + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + +/** + * FIELD_FIT() - check if value fits in the field + * @_mask: shifted mask defining the field's length and position + * @_val: value to test against the field + * + * Return: true if @_val can fit inside @_mask, false if @_val is too big. + */ +#define FIELD_FIT(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ + !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +extern void __compiletime_error("value doesn't fit into mask") +__field_overflow(void); +extern void __compiletime_error("bad bitfield mask") +__bad_mask(void); +static __always_inline u64 field_multiplier(u64 field) +{ + if ((field | (field - 1)) & ((field | (field - 1)) + 1)) + __bad_mask(); + return field & -field; +} +static __always_inline u64 field_mask(u64 field) +{ + return field / field_multiplier(field); +} +#define field_max(field) ((typeof(field))field_mask(field)) +#define ____MAKE_OP(type,base,to,from) \ +static __always_inline __##type type##_encode_bits(base v, base field) \ +{ \ + if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ + __field_overflow(); \ + return to((v & field_mask(field)) * field_multiplier(field)); \ +} \ +static __always_inline __##type type##_replace_bits(__##type old, \ + base val, base field) \ +{ \ + return (old & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline void type##p_replace_bits(__##type *p, \ + base val, base field) \ +{ \ + *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline base type##_get_bits(__##type v, base field) \ +{ \ + return (from(v) & field)/field_multiplier(field); \ +} +#define __MAKE_OP(size) \ + ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ + ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ + ____MAKE_OP(u##size,u##size,,) +____MAKE_OP(u8,u8,,) +__MAKE_OP(16) +__MAKE_OP(32) +__MAKE_OP(64) +#undef __MAKE_OP +#undef ____MAKE_OP + +#endif diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 477a1cae513f..f3566ea0f932 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -1,36 +1,31 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_BITOPS_H -#define _PERF_BITOPS_H +#ifndef _TOOLS_LINUX_BITMAP_H +#define _TOOLS_LINUX_BITMAP_H #include <string.h> #include <linux/bitops.h> +#include <linux/find.h> #include <stdlib.h> #include <linux/kernel.h> #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] -int __bitmap_weight(const unsigned long *bitmap, int bits); +unsigned int __bitmap_weight(const unsigned long *bitmap, int bits); void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); -int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); void bitmap_clear(unsigned long *map, unsigned int start, int len); +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) -#define BITMAP_LAST_WORD_MASK(nbits) \ -( \ - ((nbits) % BITS_PER_LONG) ? \ - (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ -) - -#define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) - -static inline void bitmap_zero(unsigned long *dst, int nbits) +static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = 0UL; @@ -50,7 +45,7 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); } -static inline int bitmap_empty(const unsigned long *src, unsigned nbits) +static inline bool bitmap_empty(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -58,7 +53,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline int bitmap_full(const unsigned long *src, unsigned int nbits) +static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); @@ -66,7 +61,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits) return find_first_zero_bit(src, nbits) == nbits; } -static inline int bitmap_weight(const unsigned long *src, int nbits) +static inline unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -74,7 +69,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -83,44 +78,10 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, } /** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - */ -static inline int test_and_set_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old | mask; - - return (old & mask) != 0; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - */ -static inline int test_and_clear_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old & ~mask; - - return (old & mask) != 0; -} - -/** - * bitmap_alloc - Allocate bitmap + * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits */ -static inline unsigned long *bitmap_alloc(int nbits) +static inline unsigned long *bitmap_zalloc(int nbits) { return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); } @@ -141,7 +102,7 @@ static inline void bitmap_free(unsigned long *bitmap) * @buf: buffer to store output * @size: size of @buf */ -size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, +size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits, char *buf, size_t size); /** @@ -151,7 +112,7 @@ size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, * @src2: operand 2 * @nbits: size of bitmap */ -static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, +static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) @@ -167,8 +128,8 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) -static inline int bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_equal(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -178,4 +139,14 @@ static inline int bitmap_equal(const unsigned long *src1, return __bitmap_equal(src1, src2, nbits); } -#endif /* _PERF_BITOPS_H */ +static inline bool bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, + unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + else + return __bitmap_intersects(src1, src2, nbits); +} + +#endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 5fca38fe1ba8..f18683b95ea6 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -26,6 +26,22 @@ extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); /* + * Defined here because those may be needed by architecture-specific static + * inlines. + */ + +#define bitop(op, nr, addr) \ + op(nr, addr) + +#define __set_bit(nr, addr) bitop(___set_bit, nr, addr) +#define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) +#define __change_bit(nr, addr) bitop(___change_bit, nr, addr) +#define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) +#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) +#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) +#define test_bit(nr, addr) bitop(_test_bit, nr, addr) + +/* * Include this here because some architectures need generic_ffs/fls in * scope * diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..7c0cf5031abe 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -6,7 +6,6 @@ #include <vdso/bits.h> #include <asm/bitsperlong.h> -#define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) @@ -22,7 +21,7 @@ #include <linux/build_bug.h> #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 57890b357f85..72535f00572f 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ @@ -38,7 +47,7 @@ asm( \ ____BTF_ID(symbol) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing @@ -73,7 +82,7 @@ asm( \ __BTF_ID_LIST(name, local) \ extern u32 name[]; -#define BTF_ID_LIST_GLOBAL(name) \ +#define BTF_ID_LIST_GLOBAL(name, n) \ __BTF_ID_LIST(name, globl) /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with @@ -82,6 +91,9 @@ __BTF_ID_LIST(name, globl) #define BTF_ID_LIST_SINGLE(name, prefix, typename) \ BTF_ID_LIST(name) \ BTF_ID(prefix, typename) +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ + BTF_ID_LIST_GLOBAL(name, 1) \ + BTF_ID(prefix, typename) /* * The BTF_ID_UNUSED macro defines 4 zero bytes. @@ -143,13 +155,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ @@ -172,7 +185,10 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) enum { #define BTF_SOCK_TYPE(name, str) name, @@ -184,4 +200,18 @@ MAX_BTF_SOCK_TYPE, extern u32 btf_sock_ids[]; #endif +#define BTF_TRACING_TYPE_xxx \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) + +enum { +#define BTF_TRACING_TYPE(name, type) name, +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE +MAX_BTF_TRACING_TYPE, +}; + +extern u32 btf_tracing_ids[]; + #endif diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index cc7070c7439b..b4898ff085de 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,4 +79,13 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert + +/* + * Compile time check that field has an expected offset + */ +#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \ + BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \ + "Offset of " #field " in " #type " has changed.") + + #endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/include/linux/cache.h b/tools/include/linux/cache.h new file mode 100644 index 000000000000..9e9d585f0b9d --- /dev/null +++ b/tools/include/linux/cache.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_CACHE_H +#define _TOOLS_LINUX_CACHE_H + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define SMP_CACHE_BYTES L1_CACHE_BYTES + +#endif diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index b9d4322e1e65..62e7c901ac28 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -12,13 +12,15 @@ + __GNUC_PATCHLEVEL__) #endif -#if GCC_VERSION >= 70000 && !defined(__CHECKER__) -# define __fallthrough __attribute__ ((fallthrough)) +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ #endif -#if GCC_VERSION >= 40300 +#if __has_attribute(__error__) # define __compiletime_error(message) __attribute__((error(message))) -#endif /* GCC_VERSION >= 40300 */ +#endif /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) @@ -27,18 +29,6 @@ #define __pure __attribute__((pure)) #endif #define noinline __attribute__((noinline)) -#ifdef __has_attribute -#if __has_attribute(disable_tail_calls) -#define __no_tail_call __attribute__((disable_tail_calls)) -#endif -#endif -#ifndef __no_tail_call -#if GCC_VERSION > 40201 -#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) -#else -#define __no_tail_call -#endif -#endif #ifndef __packed #define __packed __attribute__((packed)) #endif @@ -50,7 +40,3 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) - -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 2b3f7353e891..7b65566f3e42 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,9 +2,7 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ -#ifdef __GNUC__ -#include <linux/compiler-gcc.h> -#endif +#include <linux/compiler_types.h> #ifndef __compiletime_error # define __compiletime_error(message) @@ -44,18 +42,35 @@ # define __always_inline inline __attribute__((always_inline)) #endif +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif + +#ifndef __noreturn +#define __noreturn __attribute__((__noreturn__)) +#endif + +#ifndef unreachable +#define unreachable() __builtin_unreachable() +#endif + #ifndef noinline #define noinline #endif -#ifndef __no_tail_call -#define __no_tail_call -#endif /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: @@ -107,10 +122,6 @@ # define __init #endif -#ifndef noinline -# define noinline -#endif - #include <linux/types.h> /* @@ -195,12 +206,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s }) -#ifndef __fallthrough -# define __fallthrough -#endif - /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ #define ___PASTE(a, b) a##b #define __PASTE(a, b) ___PASTE(a, b) +#ifndef OPTIMIZER_HIDE_VAR +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) +#endif + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h new file mode 100644 index 000000000000..d09f9dc172a4 --- /dev/null +++ b/tools/include/linux/compiler_types.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COMPILER_TYPES_H +#define __LINUX_COMPILER_TYPES_H + +/* Builtins */ + +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + +#ifdef __CHECKER__ +/* context/locking */ +# define __must_hold(x) __attribute__((context(x,1,1))) +# define __acquires(x) __attribute__((context(x,0,1))) +# define __releases(x) __attribute__((context(x,1,0))) +# define __acquire(x) __context__(x,1) +# define __release(x) __context__(x,-1) +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +#else /* __CHECKER__ */ +/* context/locking */ +# define __must_hold(x) +# define __acquires(x) +# define __releases(x) +# define __acquire(x) (void)0 +# define __release(x) (void)0 +# define __cond_lock(x,c) (c) +#endif /* __CHECKER__ */ + +/* Compiler specific macros. */ +#ifdef __GNUC__ +#include <linux/compiler-gcc.h> +#endif + +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) +#endif + +#endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index b0e35eec6499..51ac441a37c3 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -7,30 +7,54 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H +#include <linux/bits.h> + #define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x10 -/* ETMv3.5/PTM's ETMCR config bit */ -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) + +/* + * Below are the definition of bit offsets for perf option, and works as + * arbitrary values for all ETM versions. + * + * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, + * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and + * directly use below macros as config bits. + */ +#define ETM_OPT_BRANCH_BROADCAST 8 +#define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 +#define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ +#define ETM4_CFG_BIT_BB 3 #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 + +/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) -static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)); -} +#define CS_AUX_HW_ID_CURR_VERSION 0 #endif diff --git a/tools/include/linux/ctype.h b/tools/include/linux/ctype.h index 310090b4c474..29ed3fe94404 100644 --- a/tools/include/linux/ctype.h +++ b/tools/include/linux/ctype.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H +#include <linux/compiler.h> + /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. @@ -23,11 +25,6 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -static inline int __isdigit(int c) -{ - return '0' <= c && c <= '9'; -} -#define isdigit(c) __isdigit(c) #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) @@ -40,6 +37,16 @@ static inline int __isdigit(int c) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) +#if __has_builtin(__builtin_isdigit) +#define isdigit(c) __builtin_isdigit(c) +#else +static inline int __isdigit(int c) +{ + return '0' <= c && c <= '9'; +} +#define isdigit(c) __isdigit(c) +#endif + static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h deleted file mode 100644 index 72d595ce764a..000000000000 --- a/tools/include/linux/debug_locks.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ -#define _LIBLOCKDEP_DEBUG_LOCKS_H_ - -#include <stddef.h> -#include <linux/compiler.h> -#include <asm/bug.h> - -#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) - -extern bool debug_locks; -extern bool debug_locks_silent; - -#endif diff --git a/tools/include/linux/debugfs.h b/tools/include/linux/debugfs.h new file mode 100644 index 000000000000..4ba06140b1be --- /dev/null +++ b/tools/include/linux/debugfs.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_DEBUGFS_H +#define _TOOLS_DEBUGFS_H + +#endif diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h index 25f2bb3a991d..332b983ead1e 100644 --- a/tools/include/linux/err.h +++ b/tools/include/linux/err.h @@ -20,7 +20,7 @@ * Userspace note: * The same principle works for userspace, because 'error' pointers * fall down to the unused hole far from user space, as described - * in Documentation/x86/x86_64/mm.rst for x86_64 arch: + * in Documentation/arch/x86/x86_64/mm.rst for x86_64 arch: * * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h index d07e586b9ba0..acb6f4daa2f0 100644 --- a/tools/include/linux/export.h +++ b/tools/include/linux/export.h @@ -3,8 +3,5 @@ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) #endif diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index ca28b6ab8db7..736bdeccdfe4 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -169,15 +169,31 @@ .off = OFF, \ .imm = 0 }) -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + */ -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/tools/include/linux/find.h b/tools/include/linux/find.h new file mode 100644 index 000000000000..38c0a542b0e2 --- /dev/null +++ b/tools/include/linux/find.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_FIND_H_ +#define _TOOLS_LINUX_FIND_H_ + +#ifndef _TOOLS_LINUX_BITMAP_H +#error tools: only <linux/bitmap.h> can be included directly +#endif + +#include <linux/bitops.h> + +unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits, + unsigned long start); +unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); +unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, + unsigned long start); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); + +#ifndef find_next_bit +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, size, offset); +} +#endif + +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_and_bit(addr1, addr2, size, offset); +} +#endif + +#ifndef find_next_zero_bit +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number of the next zero bit + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_zero_bit(addr, size, offset); +} +#endif + +#ifndef find_first_bit +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first set bit. + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} +#endif + +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + +#ifndef find_first_zero_bit +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first cleared bit. + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} +#endif + +#endif /*__LINUX_FIND_H_ */ diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h index 22030756fbc0..6a10ff5f5be9 100644 --- a/tools/include/linux/gfp.h +++ b/tools/include/linux/gfp.h @@ -1,4 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_INCLUDE_LINUX_GFP_H #define _TOOLS_INCLUDE_LINUX_GFP_H +#include <linux/types.h> +#include <linux/gfp_types.h> + +static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) +{ + return !!(gfp_flags & __GFP_DIRECT_RECLAIM); +} + #endif /* _TOOLS_INCLUDE_LINUX_GFP_H */ diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h new file mode 100644 index 000000000000..5f9f1ed190a0 --- /dev/null +++ b/tools/include/linux/gfp_types.h @@ -0,0 +1 @@ +#include "../../../include/linux/gfp_types.h" diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h deleted file mode 100644 index b25580b6a9be..000000000000 --- a/tools/include/linux/hardirq.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ -#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ - -#define SOFTIRQ_BITS 0UL -#define HARDIRQ_BITS 0UL -#define SOFTIRQ_SHIFT 0UL -#define HARDIRQ_SHIFT 0UL -#define hardirq_count() 0UL -#define softirq_count() 0UL - -#endif diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h index ad6fa21d977b..38edaa08f862 100644 --- a/tools/include/linux/hash.h +++ b/tools/include/linux/hash.h @@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val) return val * GOLDEN_RATIO_32; } -#ifndef HAVE_ARCH_HASH_32 -#define hash_32 hash_32_generic -#endif -static inline u32 hash_32_generic(u32 val, unsigned int bits) +static inline u32 hash_32(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h new file mode 100644 index 000000000000..aaa8a0767aa3 --- /dev/null +++ b/tools/include/linux/interval_tree_generic.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + Interval Trees + (C) 2012 Michel Lespinasse <walken@google.com> + + + include/linux/interval_tree_generic.h +*/ + +#include <linux/rbtree_augmented.h> + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoint of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + * + * Note - before using this, please consider if generic version + * (interval_tree.h) would work for you... + */ + +#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ + \ +/* Callbacks for augmented rbtree insert and remove */ \ + \ +RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \ + ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \ + \ +/* Insert / remove interval nodes from the tree */ \ + \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ + ITTYPE start = ITSTART(node), last = ITLAST(node); \ + ITSTRUCT *parent; \ + bool leftmost = true; \ + \ + while (*link) { \ + rb_parent = *link; \ + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ + if (parent->ITSUBTREE < last) \ + parent->ITSUBTREE = last; \ + if (start < ITSTART(parent)) \ + link = &parent->ITRB.rb_left; \ + else { \ + link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ + } \ + \ + node->ITSUBTREE = last; \ + rb_link_node(&node->ITRB, rb_parent, link); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ +} \ + \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ +{ \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ +} \ + \ +/* \ + * Iterate over intervals intersecting [start;last] \ + * \ + * Note that a node's interval intersects [start;last] iff: \ + * Cond1: ITSTART(node) <= last \ + * and \ + * Cond2: start <= ITLAST(node) \ + */ \ + \ +static ITSTRUCT * \ +ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + while (true) { \ + /* \ + * Loop invariant: start <= node->ITSUBTREE \ + * (Cond2 is satisfied by one of the subtree nodes) \ + */ \ + if (node->ITRB.rb_left) { \ + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ + ITSTRUCT, ITRB); \ + if (start <= left->ITSUBTREE) { \ + /* \ + * Some nodes in left subtree satisfy Cond2. \ + * Iterate to find the leftmost such node N. \ + * If it also satisfies Cond1, that's the \ + * match we are looking for. Otherwise, there \ + * is no matching interval as nodes to the \ + * right of N can't satisfy Cond1 either. \ + */ \ + node = left; \ + continue; \ + } \ + } \ + if (ITSTART(node) <= last) { /* Cond1 */ \ + if (start <= ITLAST(node)) /* Cond2 */ \ + return node; /* node is leftmost match */ \ + if (node->ITRB.rb_right) { \ + node = rb_entry(node->ITRB.rb_right, \ + ITSTRUCT, ITRB); \ + if (start <= node->ITSUBTREE) \ + continue; \ + } \ + } \ + return NULL; /* No match */ \ + } \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ +{ \ + ITSTRUCT *node, *leftmost; \ + \ + if (!root->rb_root.rb_node) \ + return NULL; \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ + if (node->ITSUBTREE < start) \ + return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ + return ITPREFIX ## _subtree_search(node, start, last); \ +} \ + \ +ITSTATIC ITSTRUCT * \ +ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ +{ \ + struct rb_node *rb = node->ITRB.rb_right, *prev; \ + \ + while (true) { \ + /* \ + * Loop invariants: \ + * Cond1: ITSTART(node) <= last \ + * rb == node->ITRB.rb_right \ + * \ + * First, search right subtree if suitable \ + */ \ + if (rb) { \ + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ + if (start <= right->ITSUBTREE) \ + return ITPREFIX ## _subtree_search(right, \ + start, last); \ + } \ + \ + /* Move up the tree until we come from a node's left child */ \ + do { \ + rb = rb_parent(&node->ITRB); \ + if (!rb) \ + return NULL; \ + prev = &node->ITRB; \ + node = rb_entry(rb, ITSTRUCT, ITRB); \ + rb = node->ITRB.rb_right; \ + } while (prev == rb); \ + \ + /* Check if the node intersects [start;last] */ \ + if (last < ITSTART(node)) /* !Cond1 */ \ + return NULL; \ + else if (start <= ITLAST(node)) /* Cond2 */ \ + return node; \ + } \ +} diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h new file mode 100644 index 000000000000..e129871fe661 --- /dev/null +++ b/tools/include/linux/io.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_IO_H +#define _TOOLS_IO_H + +#endif diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h deleted file mode 100644 index 501262aee8ff..000000000000 --- a/tools/include/linux/irqflags.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ - -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS - -# define stop_critical_timings() do { } while (0) -# define start_critical_timings() do { } while (0) - -#define raw_local_irq_disable() do { } while (0) -#define raw_local_irq_enable() do { } while (0) -#define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) ((void)(flags)) -#define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) ((void)(flags)) -#define raw_irqs_disabled() 0 -#define raw_safe_halt() - -#define local_irq_enable() do { } while (0) -#define local_irq_disable() do { } while (0) -#define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) ((void)(flags)) -#define local_save_flags(flags) ((flags) = 0) -#define irqs_disabled() (1) -#define irqs_disabled_flags(flags) ((void)(flags), 0) -#define safe_halt() do { } while (0) - -#define trace_lock_release(x, y) -#define trace_lock_acquire(a, b, c, d, e, f, g) - -#endif diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index efb6c3f5f2a9..5a37ccbec54f 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -6,7 +6,7 @@ #include <stdio.h> #include <unistd.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct module; diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h new file mode 100644 index 000000000000..13b86bd3b746 --- /dev/null +++ b/tools/include/linux/kconfig.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_KCONFIG_H +#define _TOOLS_LINUX_KCONFIG_H + +/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ + +#define __ARG_PLACEHOLDER_1 0, +#define __take_second_arg(__ignored, val, ...) val + +/* + * The use of "&&" / "||" is limited in certain expressions. + * The following enable to calculate "and" / "or" with macro expansion only. + */ +#define __and(x, y) ___and(x, y) +#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) +#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0) + +#define __or(x, y) ___or(x, y) +#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y) +#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y) + +/* + * Helper macros to use CONFIG_ options in C/CPP expressions. Note that + * these only work with boolean and tristate options. + */ + +/* + * Getting something that works in C and CPP for an arg that may or may + * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" + * we match on the placeholder define, insert the "0," for arg1 and generate + * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). + * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when + * the last step cherry picks the 2nd arg, we get a zero. + */ +#define __is_defined(x) ___is_defined(x) +#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) +#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) + +/* + * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 + * otherwise. For boolean options, this is equivalent to + * IS_ENABLED(CONFIG_FOO). + */ +#define IS_BUILTIN(option) __is_defined(option) + +/* + * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 + * otherwise. + */ +#define IS_MODULE(option) __is_defined(option##_MODULE) + +/* + * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled + * code can call a function defined in code compiled based on CONFIG_FOO. + * This is similar to IS_ENABLED(), but returns false when invoked from + * built-in code when CONFIG_FOO is set to 'm'. + */ +#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \ + __and(IS_MODULE(option), __is_defined(MODULE))) + +/* + * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', + * 0 otherwise. + */ +#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) + +#endif /* _TOOLS_LINUX_KCONFIG_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index a7e54a08fb54..4b0673bf52c2 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,6 +7,7 @@ #include <assert.h> #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/math.h> #include <endian.h> #include <byteswap.h> @@ -14,7 +15,7 @@ #define UINT_MAX (~0U) #endif -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define _RET_IP_ ((unsigned long)__builtin_return_address(0)) #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -52,14 +53,9 @@ _min1 < _min2 ? _min1 : _min2; }) #endif -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif +#define max_t(type, x, y) max((type)x, (type)y) +#define min_t(type, x, y) min((type)x, (type)y) +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) #ifndef BUG_ON #ifdef NDEBUG @@ -102,17 +98,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); +#ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) +#endif #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/list.h b/tools/include/linux/list.h index b2fc48d5478c..a4dfb6a7cc6a 100644 --- a/tools/include/linux/list.h +++ b/tools/include/linux/list.h @@ -385,6 +385,17 @@ static inline void list_splice_tail_init(struct list_head *list, (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) /** + * list_last_entry_or_null - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_last_entry_or_null(ptr, type, member) \ + (!list_empty(ptr) ? list_last_entry(ptr, type, member) : NULL) + +/** * list_next_entry - get the next element in list * @pos: the type * to cursor * @member: the name of the list_head within the struct. diff --git a/tools/include/linux/list_sort.h b/tools/include/linux/list_sort.h new file mode 100644 index 000000000000..453105f74e05 --- /dev/null +++ b/tools/include/linux/list_sort.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LIST_SORT_H +#define _LINUX_LIST_SORT_H + +#include <linux/types.h> + +struct list_head; + +typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, + const struct list_head *, const struct list_head *); + +__attribute__((nonnull(2,3))) +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); +#endif diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h deleted file mode 100644 index e56997288f2b..000000000000 --- a/tools/include/linux/lockdep.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LOCKDEP_H_ -#define _LIBLOCKDEP_LOCKDEP_H_ - -#include <sys/prctl.h> -#include <sys/syscall.h> -#include <string.h> -#include <limits.h> -#include <linux/utsname.h> -#include <linux/compiler.h> -#include <linux/export.h> -#include <linux/kern_levels.h> -#include <linux/err.h> -#include <linux/rcu.h> -#include <linux/list.h> -#include <linux/hardirq.h> -#include <unistd.h> - -#define MAX_LOCK_DEPTH 63UL - -#define asmlinkage -#define __visible - -#include "../../../include/linux/lockdep.h" - -struct task_struct { - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; - int pid; - int state; - char comm[17]; -}; - -#define TASK_RUNNING 0 - -extern struct task_struct *__curr(void); - -#define current (__curr()) - -static inline int debug_locks_off(void) -{ - return 1; -} - -#define task_pid_nr(tsk) ((tsk)->pid) - -#define KSYM_NAME_LEN 128 -#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) -#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define pr_warn pr_err -#define pr_cont pr_err - -#define list_del_rcu list_del - -#define atomic_t unsigned long -#define atomic_inc(x) ((*(x))++) - -#define print_tainted() "" -#define static_obj(x) 1 - -#define debug_show_all_locks() -extern void debug_check_no_locks_held(void); - -static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) -{ - return false; -} - -#endif diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h new file mode 100644 index 000000000000..4e7af99ec9eb --- /dev/null +++ b/tools/include/linux/math.h @@ -0,0 +1,25 @@ +#ifndef _TOOLS_MATH_H +#define _TOOLS_MATH_H + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h new file mode 100644 index 000000000000..4ad45d5943dc --- /dev/null +++ b/tools/include/linux/math64.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include <linux/types.h> + +#ifdef __x86_64__ +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 q; + + asm ("mulq %2; divq %3" : "=a" (q) + : "a" (a), "rm" (b), "rm" (c) + : "rdx"); + + return q; +} +#define mul_u64_u64_div64 mul_u64_u64_div64 +#endif + +#ifdef __SIZEOF_INT128__ +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * b) >> shift); +} + +#else + +#ifdef __i386__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#else +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + u32 ah, al; + u64 ret; + + al = a; + ah = a >> 32; + + ret = mul_u32_u32(al, b) >> shift; + if (ah) + ret += mul_u32_u32(ah, b) << (32 - shift); + + return ret; +} + +#endif /* __SIZEOF_INT128__ */ + +#ifndef mul_u64_u64_div64 +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 quot, rem; + + quot = a / c; + rem = a % c; + + return quot * b + (rem * b) / c; +} +#endif + +#endif /* _LINUX_MATH64_H */ diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h new file mode 100644 index 000000000000..f3c82ab5b14c --- /dev/null +++ b/tools/include/linux/mm.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MM_H +#define _TOOLS_LINUX_MM_H + +#include <linux/mmzone.h> +#include <uapi/linux/const.h> + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define PHYS_ADDR_MAX (~(phys_addr_t)0) + +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) + +#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) + +#define __va(x) ((void *)((unsigned long)(x))) +#define __pa(x) ((unsigned long)(x)) + +#define pfn_to_page(pfn) ((void *)((pfn) * PAGE_SIZE)) + +#define phys_to_virt phys_to_virt +static inline void *phys_to_virt(unsigned long address) +{ + return __va(address); +} + +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); + +static inline void totalram_pages_inc(void) +{ +} + +static inline void totalram_pages_add(long count) +{ +} + +#endif diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h deleted file mode 100644 index 577f51436cf9..000000000000 --- a/tools/include/linux/objtool.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_OBJTOOL_H -#define _LINUX_OBJTOOL_H - -#ifndef __ASSEMBLY__ - -#include <linux/types.h> - -/* - * This struct is used by asm and inline asm code to manually annotate the - * location of registers on the stack. - */ -struct unwind_hint { - u32 ip; - s16 sp_offset; - u8 sp_reg; - u8 type; - u8 end; -}; -#endif - -/* - * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP - * (the caller's SP right before it made the call). Used for all callable - * functions, i.e. all C code and all callable asm functions. - * - * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset - * points to a fully populated pt_regs from a syscall, interrupt, or exception. - * - * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that - * sp_reg+sp_offset points to the iret return frame. - */ -#define UNWIND_HINT_TYPE_CALL 0 -#define UNWIND_HINT_TYPE_REGS 1 -#define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 - -#ifdef CONFIG_STACK_VALIDATION - -#ifndef __ASSEMBLY__ - -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "987: \n\t" \ - ".pushsection .discard.unwind_hints\n\t" \ - /* struct unwind_hint */ \ - ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ - ".byte " __stringify(sp_reg) "\n\t" \ - ".byte " __stringify(type) "\n\t" \ - ".byte " __stringify(end) "\n\t" \ - ".balign 4 \n\t" \ - ".popsection\n\t" - -/* - * This macro marks the given function's stack frame as "non-standard", which - * tells objtool to ignore the function when doing stack metadata validation. - * It should only be used in special cases where you're 100% sure it won't - * affect the reliability of frame pointers and kernel stack traces. - * - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ -#define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(".discard.func_stack_frame_non_standard") \ - *__func_stack_frame_non_standard_##func = func - -#else /* __ASSEMBLY__ */ - -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; - -/* - * In asm, there are two kinds of code: normal C-type callable functions and - * the rest. The normal callable functions can be called by other code, and - * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this - * category. In this case, no special debugging annotations are needed because - * objtool can automatically generate the ORC data for the ORC unwinder to read - * at runtime. - * - * Anything which doesn't fall into the above category, such as syscall and - * interrupt handlers, tends to not be called directly by other functions, and - * often does unusual non-C-function-type things with the stack pointer. Such - * code needs to be annotated such that objtool can understand it. The - * following CFI hint macros are for this type of code. - * - * These macros provide hints to objtool about the state of the stack at each - * instruction. Objtool starts from the hints and follows the code flow, - * making automatic CFI adjustments when it sees pushes and pops, filling out - * the debuginfo as necessary. It will also warn if it sees any - * inconsistencies. - */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 -.Lunwind_hint_ip_\@: - .pushsection .discard.unwind_hints - /* struct unwind_hint */ - .long .Lunwind_hint_ip_\@ - . - .short \sp_offset - .byte \sp_reg - .byte \type - .byte \end - .balign 4 - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#else /* !CONFIG_STACK_VALIDATION */ - -#ifndef __ASSEMBLY__ - -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "\n\t" -#define STACK_FRAME_NON_STANDARD(func) -#else -#define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 -.endm -#endif - -#endif /* CONFIG_STACK_VALIDATION */ - -#endif /* _LINUX_OBJTOOL_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h new file mode 100644 index 000000000000..453a4f4ef39d --- /dev/null +++ b/tools/include/linux/objtool_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_OBJTOOL_TYPES_H +#define _LINUX_OBJTOOL_TYPES_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; + u8 signal; +}; + +#endif /* __ASSEMBLY__ */ + +/* + * UNWIND_HINT_TYPE_UNDEFINED: A blind spot in ORC coverage which can result in + * a truncated and unreliable stack unwind. + * + * UNWIND_HINT_TYPE_END_OF_STACK: The end of the kernel stack unwind before + * hitting user entry, boot code, or fork entry (when there are no pt_regs + * available). + * + * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP + * (the caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset + * points to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_TYPE_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_TYPE_{SAVE,RESTORE}: Save the unwind metadata at a certain + * location so that it can be restored later. + */ +#define UNWIND_HINT_TYPE_UNDEFINED 0 +#define UNWIND_HINT_TYPE_END_OF_STACK 1 +#define UNWIND_HINT_TYPE_CALL 2 +#define UNWIND_HINT_TYPE_REGS 3 +#define UNWIND_HINT_TYPE_REGS_PARTIAL 4 +/* The below hint types don't have corresponding ORC types */ +#define UNWIND_HINT_TYPE_FUNC 5 +#define UNWIND_HINT_TYPE_SAVE 6 +#define UNWIND_HINT_TYPE_RESTORE 7 + +#endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index 8712ff70995f..dcb0c1bf6866 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -5,12 +5,9 @@ #include <linux/compiler.h> /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -36,8 +33,6 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) - -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -73,135 +68,6 @@ __builtin_mul_overflow(__a, __b, __d); \ }) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d)) - -#define check_sub_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d)) - -#define check_mul_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** * array_size() - Calculate size of 2-dimensional array. * diff --git a/tools/include/linux/pfn.h b/tools/include/linux/pfn.h new file mode 100644 index 000000000000..7512a58189eb --- /dev/null +++ b/tools/include/linux/pfn.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_PFN_H_ +#define _TOOLS_LINUX_PFN_H_ + +#include <linux/mm.h> + +#define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) +#endif diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index d29725769107..2e6338ac5eed 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -35,12 +35,8 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) -/********** mm/debug-pagealloc.c **********/ -#ifdef CONFIG_PAGE_POISONING_ZERO -#define PAGE_POISON 0x00 -#else +/********** mm/page_poison.c **********/ #define PAGE_POISON 0xaa -#endif /********** mm/page_alloc.c ************/ diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h deleted file mode 100644 index 8b3b03b64fda..000000000000 --- a/tools/include/linux/proc_fs.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H -#define _TOOLS_INCLUDE_LINUX_PROC_FS_H - -#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h index 30dd21f976c3..2680f2edb837 100644 --- a/tools/include/linux/rbtree.h +++ b/tools/include/linux/rbtree.h @@ -152,4 +152,194 @@ static inline void rb_replace_node_cached(struct rb_node *victim, rb_replace_node(victim, new, &root->rb_root); } -#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ +/* + * The below helper functions use 2 operators with 3 different + * calling conventions. The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * rb_find(). + * + * The reason for this is to allow the find() interface without requiring an + * on-stack dummy object, which might not be feasible due to object size. + */ + +/** + * rb_add_cached() - insert @node into the leftmost cached tree @tree + * @node: node to insert + * @tree: leftmost cached tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + bool leftmost = true; + + while (*link) { + parent = *link; + if (less(node, parent)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); +} + +/** + * rb_add() - insert @node into @tree + * @node: node to insert + * @tree: tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + + while (*link) { + parent = *link; + if (less(node, parent)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(node, parent, link); + rb_insert_color(node, tree); +} + +/** + * rb_find_add() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) + link = &parent->rb_left; + else if (c > 0) + link = &parent->rb_right; + else + return parent; + } + + rb_link_node(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + +/** + * rb_find() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + + while (node) { + int c = cmp(key, node); + + if (c < 0) + node = node->rb_left; + else if (c > 0) + node = node->rb_right; + else + return node; + } + + return NULL; +} + +/** + * rb_find_first() - find the first @key in @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + * + * Returns the leftmost node matching @key, or NULL. + */ +static __always_inline struct rb_node * +rb_find_first(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + struct rb_node *match = NULL; + + while (node) { + int c = cmp(key, node); + + if (c <= 0) { + if (!c) + match = node; + node = node->rb_left; + } else if (c > 0) { + node = node->rb_right; + } + } + + return match; +} + +/** + * rb_next_match() - find the next @key in @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + * + * Returns the next node matching @key, or NULL. + */ +static __always_inline struct rb_node * +rb_next_match(const void *key, struct rb_node *node, + int (*cmp)(const void *key, const struct rb_node *)) +{ + node = rb_next(node); + if (node && cmp(key, node)) + node = NULL; + return node; +} + +/** + * rb_for_each() - iterates a subtree matching @key + * @node: iterator + * @key: key to match + * @tree: tree to search + * @cmp: operator defining node order + */ +#define rb_for_each(node, key, tree, cmp) \ + for ((node) = rb_find_first((key), (tree), (cmp)); \ + (node); (node) = rb_next_match((key), (node), (cmp))) + +#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h new file mode 100644 index 000000000000..f8bffd4a987c --- /dev/null +++ b/tools/include/linux/rwsem.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _TOOLS__RWSEM_H +#define _TOOLS__RWSEM_H + +#include <pthread.h> + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +static inline int init_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_init(&sem->lock, NULL); +} + +static inline int exit_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_destroy(&sem->lock); +} + +static inline int down_read(struct rw_semaphore *sem) +{ + return pthread_rwlock_rdlock(&sem->lock); +} + +static inline int up_read(struct rw_semaphore *sem) +{ + return pthread_rwlock_unlock(&sem->lock); +} + +static inline int down_write(struct rw_semaphore *sem) +{ + return pthread_rwlock_wrlock(&sem->lock); +} + +static inline int up_write(struct rw_semaphore *sem) +{ + return pthread_rwlock_unlock(&sem->lock); +} + +#define down_read_nested(sem, subclass) down_read(sem) +#define down_write_nested(sem, subclass) down_write(sem) + +#endif /* _TOOLS_RWSEM_H */ diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h index c8d9f19c1f35..967294b8edcf 100644 --- a/tools/include/linux/sched/mm.h +++ b/tools/include/linux/sched/mm.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_PERF_LINUX_SCHED_MM_H #define _TOOLS_PERF_LINUX_SCHED_MM_H +#define might_alloc(gfp) do { } while (0) + #endif /* _TOOLS_PERF_LINUX_SCHED_MM_H */ diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h index 102fd9217f1f..f6bc226af0c1 100644 --- a/tools/include/linux/seq_file.h +++ b/tools/include/linux/seq_file.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H #define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H +struct seq_file; + #endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */ diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h new file mode 100644 index 000000000000..51b25e9c4ec7 --- /dev/null +++ b/tools/include/linux/slab.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_SLAB_H +#define _TOOLS_SLAB_H + +#include <linux/types.h> +#include <linux/gfp.h> + +#define SLAB_PANIC 2 +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ + +#define kzalloc_node(size, flags, node) kmalloc(size, flags) + +void *kmalloc(size_t size, gfp_t gfp); +void kfree(void *p); + +bool slab_is_available(void); + +enum slab_state { + DOWN, + PARTIAL, + UP, + FULL +}; + +static inline void *kzalloc(size_t size, gfp_t gfp) +{ + return kmalloc(size, gfp | __GFP_ZERO); +} + +struct list_lru; + +void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags); +static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) +{ + return kmem_cache_alloc_lru(cachep, NULL, flags); +} +void kmem_cache_free(struct kmem_cache *cachep, void *objp); + +struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, + unsigned int align, unsigned int flags, + void (*ctor)(void *)); + +void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); +int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, + void **list); + +#endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index c934572d935c..a6cdf25b6b9d 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -11,6 +11,7 @@ #define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock(x) pthread_mutex_lock(x) +#define spin_lock_nested(x, subclass) pthread_mutex_lock(x) #define spin_unlock(x) pthread_mutex_unlock(x) #define spin_lock_bh(x) pthread_mutex_lock(x) #define spin_unlock_bh(x) pthread_mutex_unlock(x) @@ -37,6 +38,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } -#include <linux/lockdep.h> - #endif diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h deleted file mode 100644 index ae343ac35bfa..000000000000 --- a/tools/include/linux/stacktrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ -#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ - -#include <execinfo.h> - -struct stack_trace { - unsigned int nr_entries, max_entries; - unsigned long *entries; - int skip; -}; - -static inline void print_stack_trace(struct stack_trace *trace, int spaces) -{ - backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); -} - -#define save_stack_trace(trace) \ - ((trace)->nr_entries = \ - backtrace((void **)(trace)->entries, (trace)->max_entries)) - -static inline int dump_stack(void) -{ - void *array[64]; - size_t size; - - size = backtrace(array, 64); - backtrace_symbols_fd(array, size, 1); - - return 0; -} - -#endif diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 89135bb35bf7..5a00b8b2cf9f 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -4,11 +4,13 @@ #include <linux/types.h> #include <linux/stringify.h> +#include <linux/compiler.h> #define STATIC_CALL_KEY_PREFIX __SCK__ #define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) #define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) +#define STATIC_CALL_KEY_STR(name) __stringify(STATIC_CALL_KEY(name)) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) @@ -32,4 +34,70 @@ struct static_call_site { s32 key; }; +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name)) + +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __STATIC_CALL_ADDRESSABLE(name) \ + __ADDRESSABLE(STATIC_CALL_KEY(name)) + +#define __static_call(name) \ +({ \ + __STATIC_CALL_ADDRESSABLE(name); \ + __raw_static_call(name); \ +}) + +struct static_call_key { + void *func; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; +}; + +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ + +#define __STATIC_CALL_ADDRESSABLE(name) +#define __static_call(name) __raw_static_call(name) + +struct static_call_key { + void *func; +}; + +#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ + +#ifdef MODULE +#define __STATIC_CALL_MOD_ADDRESSABLE(name) +#define static_call_mod(name) __raw_static_call(name) +#else +#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name) +#define static_call_mod(name) __static_call(name) +#endif + +#define static_call(name) __static_call(name) + +#else + +struct static_call_key { + void *func; +}; + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 5e9e781905ed..db5c99318c79 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); +extern void *memchr_inv(const void *start, int c, size_t bytes); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 154eb4e3ca7c..8519386acd23 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -6,7 +6,10 @@ #include <stddef.h> #include <stdint.h> +#ifndef __SANE_USERSPACE_TYPES__ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#endif + #include <asm/types.h> #include <asm/posix_types.h> @@ -40,14 +43,18 @@ typedef __u8 u8; typedef __s8 s8; #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ #define __force +/* This is defined in linux/compiler_types.h and is left for backward + * compatibility. + */ +#ifndef __user #define __user +#endif #define __must_check #define __cold @@ -58,10 +65,23 @@ typedef __u32 __bitwise __be32; typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#ifdef CONFIG_PHYS_ADDR_T_64BIT +typedef u64 phys_addr_t; +#else +typedef u32 phys_addr_t; +#endif + typedef struct { int counter; } atomic_t; +typedef struct { + long counter; +} atomic_long_t; + #ifndef __aligned_u64 # define __aligned_u64 __u64 __attribute__((aligned(8))) #endif diff --git a/tools/include/nolibc/.gitignore b/tools/include/nolibc/.gitignore new file mode 100644 index 000000000000..dea22eaaed2b --- /dev/null +++ b/tools/include/nolibc/.gitignore @@ -0,0 +1 @@ +sysroot diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile new file mode 100644 index 000000000000..e69c26abe1ea --- /dev/null +++ b/tools/include/nolibc/Makefile @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc installation and tests +include ../../scripts/Makefile.include + +# we're in ".../tools/include/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR))) +endif + +# when run as make -C tools/ nolibc_<foo> the arch is not set +ifeq ($(ARCH),) +include $(srctree)/scripts/subarch.include +ARCH = $(SUBARCH) +endif + +# OUTPUT is only set when run from the main makefile, otherwise +# it defaults to this nolibc directory. +OUTPUT ?= $(CURDIR)/ + +ifeq ($(V),1) +Q= +else +Q=@ +endif + +nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) +arch_file := arch-$(nolibc_arch).h +all_files := \ + compiler.h \ + crt.h \ + ctype.h \ + errno.h \ + nolibc.h \ + signal.h \ + stackprotector.h \ + std.h \ + stdarg.h \ + stdint.h \ + stdlib.h \ + string.h \ + sys.h \ + time.h \ + types.h \ + unistd.h \ + stdio.h \ + + +# install all headers needed to support a bare-metal compiler +all: headers + +install: help + +help: + @echo "Supported targets under nolibc:" + @echo " all call \"headers\"" + @echo " clean clean the sysroot" + @echo " headers prepare a sysroot in tools/include/nolibc/sysroot" + @echo " headers_standalone like \"headers\", and also install kernel headers" + @echo " help this help" + @echo "" + @echo "These targets may also be called from tools as \"make nolibc_<target>\"." + @echo "" + @echo "Currently using the following variables:" + @echo " ARCH = $(ARCH)" + @echo " OUTPUT = $(OUTPUT)" + @echo "" + +# Note: when ARCH is "x86" we concatenate both x86_64 and i386 +headers: + $(Q)mkdir -p $(OUTPUT)sysroot + $(Q)mkdir -p $(OUTPUT)sysroot/include + $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ + $(Q)if [ "$(ARCH)" = "x86" ]; then \ + sed -e \ + 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ + arch-x86_64.h; \ + sed -e \ + 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ + arch-i386.h; \ + elif [ -e "$(arch_file)" ]; then \ + cat $(arch_file); \ + else \ + echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \ + exit 1; \ + fi > $(OUTPUT)sysroot/include/arch.h + +headers_standalone: headers + $(Q)$(MAKE) -C $(srctree) headers + $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot + +clean: + $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h new file mode 100644 index 000000000000..b23ac1f04035 --- /dev/null +++ b/tools/include/nolibc/arch-aarch64.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * AARCH64 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_AARCH64_H +#define _NOLIBC_ARCH_AARCH64_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for AARCH64 : + * - registers are 64-bit + * - stack is 16-byte aligned + * - syscall number is passed in x8 + * - arguments are in x0, x1, x2, x3, x4, x5 + * - the system call is performed by calling svc 0 + * - syscall return comes in x0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0"); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + register long _arg5 __asm__ ("x4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = (long)(arg3); \ + register long _arg4 __asm__ ("x3") = (long)(arg4); \ + register long _arg5 __asm__ ("x4") = (long)(arg5); \ + register long _arg6 __asm__ ("x5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ + "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ + "bl _start_c\n" /* transfer to c runtime */ + ); + __builtin_unreachable(); +} +#endif /* _NOLIBC_ARCH_AARCH64_H */ diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h new file mode 100644 index 000000000000..cae4afa7c1c7 --- /dev/null +++ b/tools/include/nolibc/arch-arm.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ARM specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_ARM_H +#define _NOLIBC_ARCH_ARM_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for ARM in ARM or Thumb modes : + * - registers are 32-bit + * - stack is 8-byte aligned + * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) + * - syscall number is passed in r7 + * - arguments are in r0, r1, r2, r3, r4, r5 + * - the system call is performed by calling svc #0 + * - syscall return comes in r0. + * - only lr is clobbered. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * - in thumb mode without -fomit-frame-pointer, r7 is also used to store the + * frame pointer, and we cannot directly assign it as a register variable, + * nor can we clobber it. Instead we assign the r6 register and swap it + * with r7 before calling svc, and r6 is marked as clobbered. + * We're just using any regular register which we assign to r7 after saving + * it. + * + * Also, ARM supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#if (defined(__THUMBEB__) || defined(__THUMBEL__)) && \ + !defined(NOLIBC_OMIT_FRAME_POINTER) +/* swap r6,r7 needed in Thumb mode since we can't use nor clobber r7 */ +#define _NOLIBC_SYSCALL_REG "r6" +#define _NOLIBC_THUMB_SET_R7 "eor r7, r6\neor r6, r7\neor r7, r6\n" +#define _NOLIBC_THUMB_RESTORE_R7 "mov r7, r6\n" + +#else /* we're in ARM mode */ +/* in Arm mode we can directly use r7 */ +#define _NOLIBC_SYSCALL_REG "r7" +#define _NOLIBC_THUMB_SET_R7 "" +#define _NOLIBC_THUMB_RESTORE_R7 "" + +#endif /* end THUMB */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r"(_num) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + register long _arg4 __asm__ ("r3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + register long _arg4 __asm__ ("r3") = (long)(arg4); \ + register long _arg5 __asm__ ("r4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ + register long _arg1 __asm__ ("r0") = (long)(arg1); \ + register long _arg2 __asm__ ("r1") = (long)(arg2); \ + register long _arg3 __asm__ ("r2") = (long)(arg3); \ + register long _arg4 __asm__ ("r3") = (long)(arg4); \ + register long _arg5 __asm__ ("r4") = (long)(arg5); \ + register long _arg6 __asm__ ("r5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_THUMB_SET_R7 \ + "svc #0\n" \ + _NOLIBC_THUMB_RESTORE_R7 \ + : "=r"(_arg1), "=r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "mov %r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ + "and ip, %r0, #-8\n" /* sp must be 8-byte aligned in the callee */ + "mov sp, ip\n" + "bl _start_c\n" /* transfer to c runtime */ + ); + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h new file mode 100644 index 000000000000..28c26a00a762 --- /dev/null +++ b/tools/include/nolibc/arch-i386.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * i386 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_I386_H +#define _NOLIBC_ARCH_I386_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ + "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ + "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ + "push %eax\n" /* push arg1 on stack to support plain stack modes too */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h new file mode 100644 index 000000000000..3f8ef8f86c0f --- /dev/null +++ b/tools/include/nolibc/arch-loongarch.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * LoongArch specific definitions for NOLIBC + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#ifndef _NOLIBC_ARCH_LOONGARCH_H +#define _NOLIBC_ARCH_LOONGARCH_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for LoongArch : + * - stack is 16-byte aligned + * - syscall number is passed in a7 + * - arguments are in a0, a1, a2, a3, a4, a5 + * - the system call is performed by calling "syscall 0" + * - syscall return comes in a0 + * - the arguments are cast to long and assigned into the target + * registers which are then simply passed as registers to the asm code, + * so that we don't have to experience issues with register constraints. + */ + +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0"); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), \ + "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + register long _arg6 __asm__ ("a5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall 0\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg1; \ +}) + +#if __loongarch_grlen == 32 +#define LONG_BSTRINS "bstrins.w" +#else /* __loongarch_grlen == 64 */ +#define LONG_BSTRINS "bstrins.d" +#endif + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ + LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ + "bl _start_c\n" /* transfer to c runtime */ + ); + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h new file mode 100644 index 000000000000..62cc50ef3288 --- /dev/null +++ b/tools/include/nolibc/arch-mips.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * MIPS specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_MIPS_H +#define _NOLIBC_ARCH_MIPS_H + +#include "compiler.h" +#include "crt.h" + +#if !defined(_ABIO32) +#error Unsupported MIPS ABI +#endif + +/* Syscalls for MIPS ABI O32 : + * - WARNING! there's always a delayed slot! + * - WARNING again, the syntax is different, registers take a '$' and numbers + * do not. + * - registers are 32-bit + * - stack is 8-byte aligned + * - syscall number is passed in v0 (starts at 0xfa0). + * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to + * leave some room in the stack for the callee to save a0..a3 if needed. + * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are + * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as + * scall32-o32.S in the kernel sources. + * - the system call is performed by calling "syscall" + * - syscall return comes in v0, and register a3 needs to be checked to know + * if an error occurred, in which case errno is in v0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", "hi", "lo", \ + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3"); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + register long _arg6 = (long)(arg6); \ + \ + __asm__ volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "sw %8, 20($sp)\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +/* startup code, note that it's called __start on MIPS */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void) +{ + __asm__ volatile ( + ".set push\n" + ".set noreorder\n" + "bal 1f\n" /* prime $ra for .cpload */ + "nop\n" + "1:\n" + ".cpload $ra\n" + "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ + "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ + ".cprestore 0\n" + "li $t0, -8\n" + "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ + "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ + "jal _start_c\n" /* transfer to c runtime */ + " nop\n" /* delayed slot */ + ".set pop\n" + ); + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h new file mode 100644 index 000000000000..ac212e6185b2 --- /dev/null +++ b/tools/include/nolibc/arch-powerpc.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * PowerPC specific definitions for NOLIBC + * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org> + */ + +#ifndef _NOLIBC_ARCH_POWERPC_H +#define _NOLIBC_ARCH_POWERPC_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for PowerPC : + * - stack is 16-byte aligned + * - syscall number is passed in r0 + * - arguments are in r3, r4, r5, r6, r7, r8, r9 + * - the system call is performed by calling "sc" + * - syscall return comes in r3, and the summary overflow bit is checked + * to know if an error occurred, in which case errno is in r3. + * - the arguments are cast to long and assigned into the target + * registers which are then simply passed as registers to the asm code, + * so that we don't have to experience issues with register constraints. + */ + +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cr0", "r12", "r11", "r10", "r9" + +#define my_syscall0(num) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num) \ + : \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \ + ); \ + _ret; \ +}) + + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num), "+r"(_arg2) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5" \ + ); \ + _ret; \ +}) + + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6" \ + ); \ + _ret; \ +}) + + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ + "+r"(_arg4) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7" \ + ); \ + _ret; \ +}) + + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + register long _arg5 __asm__ ("r7") = (long)(arg5); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ + "+r"(_arg4), "+r"(_arg5) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST, "r8" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _ret __asm__ ("r3"); \ + register long _num __asm__ ("r0") = (num); \ + register long _arg1 __asm__ ("r3") = (long)(arg1); \ + register long _arg2 __asm__ ("r4") = (long)(arg2); \ + register long _arg3 __asm__ ("r5") = (long)(arg3); \ + register long _arg4 __asm__ ("r6") = (long)(arg4); \ + register long _arg5 __asm__ ("r7") = (long)(arg5); \ + register long _arg6 __asm__ ("r8") = (long)(arg6); \ + \ + __asm__ volatile ( \ + " sc\n" \ + " bns+ 1f\n" \ + " neg %0, %0\n" \ + "1:\n" \ + : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ + "+r"(_arg4), "+r"(_arg5), "+r"(_arg6) \ + : "0"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _ret; \ +}) + +#ifndef __powerpc64__ +/* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0), + * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but + * works with __attribute__((__optimize__("-fno-stack-protector"))) + */ +#ifdef __no_stack_protector +#undef __no_stack_protector +#define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) +#endif +#endif /* !__powerpc64__ */ + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ +#ifdef __powerpc64__ +#if _CALL_ELF == 2 + /* with -mabi=elfv2, save TOC/GOT pointer to r2 + * r12 is global entry pointer, we use it to compute TOC from r12 + * https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf + * https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf + */ + __asm__ volatile ( + "addis 2, 12, .TOC. - _start@ha\n" + "addi 2, 2, .TOC. - _start@l\n" + ); +#endif /* _CALL_ELF == 2 */ + + __asm__ volatile ( + "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ + "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ + "li 0, 0\n" /* zero the frame pointer */ + "stdu 1, -32(1)\n" /* the initial stack frame */ + "bl _start_c\n" /* transfer to c runtime */ + ); +#else + __asm__ volatile ( + "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ + "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ + "li 0, 0\n" /* zero the frame pointer */ + "stwu 1, -16(1)\n" /* the initial stack frame */ + "bl _start_c\n" /* transfer to c runtime */ + ); +#endif + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h new file mode 100644 index 000000000000..1927c643c739 --- /dev/null +++ b/tools/include/nolibc/arch-riscv.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * RISCV (32 and 64) specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_RISCV_H +#define _NOLIBC_ARCH_RISCV_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for RISCV : + * - stack is 16-byte aligned + * - syscall number is passed in a7 + * - arguments are in a0, a1, a2, a3, a4, a5 + * - the system call is performed by calling ecall + * - syscall return comes in a0 + * - the arguments are cast to long and assigned into the target + * registers which are then simply passed as registers to the asm code, + * so that we don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0"); \ + \ + __asm__ volatile ( \ + "ecall\n\t" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "ecall\n\t" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("a7") = (num); \ + register long _arg1 __asm__ ("a0") = (long)(arg1); \ + register long _arg2 __asm__ ("a1") = (long)(arg2); \ + register long _arg3 __asm__ ("a2") = (long)(arg3); \ + register long _arg4 __asm__ ("a3") = (long)(arg4); \ + register long _arg5 __asm__ ("a4") = (long)(arg5); \ + register long _arg6 __asm__ ("a5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "ecall\n" \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + ".option push\n" + ".option norelax\n" + "lla gp, __global_pointer$\n" + ".option pop\n" + "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ + "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ + "call _start_c\n" /* transfer to c runtime */ + ); + __builtin_unreachable(); +} + +#endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h new file mode 100644 index 000000000000..5d60fd43f883 --- /dev/null +++ b/tools/include/nolibc/arch-s390.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * s390 specific definitions for NOLIBC + */ + +#ifndef _NOLIBC_ARCH_S390_H +#define _NOLIBC_ARCH_S390_H +#include <asm/signal.h> +#include <asm/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for s390: + * - registers are 64-bit + * - syscall number is passed in r1 + * - arguments are in r2-r7 + * - the system call is performed by calling the svc instruction + * - syscall return value is in r2 + * - r1 and r2 are clobbered, others are preserved. + * + * Link s390 ABI: https://github.com/IBM/s390x-abi + * + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _rc __asm__ ("2"); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "=d"(_rc) \ + : "d"(_num) \ + : "memory", "cc" \ + ); \ + _rc; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + register long _arg2 __asm__ ("3") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_arg2), "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + register long _arg2 __asm__ ("3") = (long)(arg2); \ + register long _arg3 __asm__ ("4") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_arg2), "d"(_arg3), "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + register long _arg2 __asm__ ("3") = (long)(arg2); \ + register long _arg3 __asm__ ("4") = (long)(arg3); \ + register long _arg4 __asm__ ("5") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + register long _arg2 __asm__ ("3") = (long)(arg2); \ + register long _arg3 __asm__ ("4") = (long)(arg3); \ + register long _arg4 __asm__ ("5") = (long)(arg4); \ + register long _arg5 __asm__ ("6") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \ + "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("1") = (num); \ + register long _arg1 __asm__ ("2") = (long)(arg1); \ + register long _arg2 __asm__ ("3") = (long)(arg2); \ + register long _arg3 __asm__ ("4") = (long)(arg3); \ + register long _arg4 __asm__ ("5") = (long)(arg4); \ + register long _arg5 __asm__ ("6") = (long)(arg5); \ + register long _arg6 __asm__ ("7") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "svc 0\n" \ + : "+d"(_arg1) \ + : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \ + "d"(_arg6), "d"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ + "aghi %r15, -160\n" /* allocate new stackframe */ + "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */ + "brasl %r14, _start_c\n" /* transfer to c runtime */ + ); + __builtin_unreachable(); +} + +struct s390_mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + struct s390_mmap_arg_struct args = { + .addr = (unsigned long)addr, + .len = (unsigned long)length, + .prot = prot, + .flags = flags, + .fd = fd, + .offset = (unsigned long)offset + }; + + return (void *)my_syscall1(__NR_mmap, &args); +} +#define sys_mmap sys_mmap + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + return my_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0); +} +#define sys_fork sys_fork + +#endif /* _NOLIBC_ARCH_S390_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h new file mode 100644 index 000000000000..68609f421934 --- /dev/null +++ b/tools/include/nolibc/arch-x86_64.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * x86_64 specific definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ARCH_X86_64_H +#define _NOLIBC_ARCH_X86_64_H + +#include "compiler.h" +#include "crt.h" + +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r11 are clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home + * + */ + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + register long _arg5 __asm__ ("r8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _ret; \ + register long _num __asm__ ("rax") = (num); \ + register long _arg1 __asm__ ("rdi") = (long)(arg1); \ + register long _arg2 __asm__ ("rsi") = (long)(arg2); \ + register long _arg3 __asm__ ("rdx") = (long)(arg3); \ + register long _arg4 __asm__ ("r10") = (long)(arg4); \ + register long _arg5 __asm__ ("r8") = (long)(arg5); \ + register long _arg6 __asm__ ("r9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ +void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ + "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __builtin_unreachable(); +} + +#define NOLIBC_ARCH_HAS_MEMMOVE +void *memmove(void *dst, const void *src, size_t len); + +#define NOLIBC_ARCH_HAS_MEMCPY +void *memcpy(void *dst, const void *src, size_t len); + +#define NOLIBC_ARCH_HAS_MEMSET +void *memset(void *dst, int c, size_t len); + +__asm__ ( +".section .text.nolibc_memmove_memcpy\n" +".weak memmove\n" +".weak memcpy\n" +"memmove:\n" +"memcpy:\n" + "movq %rdx, %rcx\n\t" + "movq %rdi, %rax\n\t" + "movq %rdi, %rdx\n\t" + "subq %rsi, %rdx\n\t" + "cmpq %rcx, %rdx\n\t" + "jb .Lbackward_copy\n\t" + "rep movsb\n\t" + "retq\n" +".Lbackward_copy:" + "leaq -1(%rdi, %rcx, 1), %rdi\n\t" + "leaq -1(%rsi, %rcx, 1), %rsi\n\t" + "std\n\t" + "rep movsb\n\t" + "cld\n\t" + "retq\n" + +".section .text.nolibc_memset\n" +".weak memset\n" +"memset:\n" + "xchgl %eax, %esi\n\t" + "movq %rdx, %rcx\n\t" + "pushq %rdi\n\t" + "rep stosb\n\t" + "popq %rax\n\t" + "retq\n" +); + +#endif /* _NOLIBC_ARCH_X86_64_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h new file mode 100644 index 000000000000..c8f4e5d3add9 --- /dev/null +++ b/tools/include/nolibc/arch.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +/* Below comes the architecture-specific code. For each architecture, we have + * the syscall declarations and the _start code definition. This is the only + * global part. On all architectures the kernel puts everything in the stack + * before jumping to _start just above us, without any return address (_start + * is not a function but an entry point). So at the stack pointer we find argc. + * Then argv[] begins, and ends at the first NULL. Then we have envp which + * starts and ends with a NULL as well. So envp=argv+argc+1. + */ + +#ifndef _NOLIBC_ARCH_H +#define _NOLIBC_ARCH_H + +#if defined(__x86_64__) +#include "arch-x86_64.h" +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-i386.h" +#elif defined(__ARM_EABI__) +#include "arch-arm.h" +#elif defined(__aarch64__) +#include "arch-aarch64.h" +#elif defined(__mips__) +#include "arch-mips.h" +#elif defined(__powerpc__) +#include "arch-powerpc.h" +#elif defined(__riscv) +#include "arch-riscv.h" +#elif defined(__s390x__) +#include "arch-s390.h" +#elif defined(__loongarch__) +#include "arch-loongarch.h" +#else +#error Unsupported Architecture +#endif + +#endif /* _NOLIBC_ARCH_H */ diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h new file mode 100644 index 000000000000..beddc3665d69 --- /dev/null +++ b/tools/include/nolibc/compiler.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * NOLIBC compiler support header + * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net> + */ +#ifndef _NOLIBC_COMPILER_H +#define _NOLIBC_COMPILER_H + +#if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) + +#define _NOLIBC_STACKPROTECTOR + +#endif /* defined(__SSP__) ... */ + +#if defined(__has_attribute) +# if __has_attribute(no_stack_protector) +# define __no_stack_protector __attribute__((no_stack_protector)) +# else +# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) +# endif +#else +# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) +#endif /* defined(__has_attribute) */ + +#endif /* _NOLIBC_COMPILER_H */ diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h new file mode 100644 index 000000000000..43b551468c2a --- /dev/null +++ b/tools/include/nolibc/crt.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * C Run Time support for NOLIBC + * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org> + */ + +#ifndef _NOLIBC_CRT_H +#define _NOLIBC_CRT_H + +char **environ __attribute__((weak)); +const unsigned long *_auxv __attribute__((weak)); + +static void __stack_chk_init(void); +static void exit(int); + +extern void (*const __preinit_array_start[])(void) __attribute__((weak)); +extern void (*const __preinit_array_end[])(void) __attribute__((weak)); + +extern void (*const __init_array_start[])(void) __attribute__((weak)); +extern void (*const __init_array_end[])(void) __attribute__((weak)); + +extern void (*const __fini_array_start[])(void) __attribute__((weak)); +extern void (*const __fini_array_end[])(void) __attribute__((weak)); + +__attribute__((weak)) +void _start_c(long *sp) +{ + long argc; + char **argv; + char **envp; + int exitcode; + void (* const *func)(void); + const unsigned long *auxv; + /* silence potential warning: conflicting types for 'main' */ + int _nolibc_main(int, char **, char **) __asm__ ("main"); + + /* initialize stack protector */ + __stack_chk_init(); + + /* + * sp : argc <-- argument count, required by main() + * argv: argv[0] <-- argument vector, required by main() + * argv[1] + * ... + * argv[argc-1] + * null + * environ: environ[0] <-- environment variables, required by main() and getenv() + * environ[1] + * ... + * null + * _auxv: _auxv[0] <-- auxiliary vector, required by getauxval() + * _auxv[1] + * ... + * null + */ + + /* assign argc and argv */ + argc = *sp; + argv = (void *)(sp + 1); + + /* find environ */ + environ = envp = argv + argc + 1; + + /* find _auxv */ + for (auxv = (void *)envp; *auxv++;) + ; + _auxv = auxv; + + for (func = __preinit_array_start; func < __preinit_array_end; func++) + (*func)(); + for (func = __init_array_start; func < __init_array_end; func++) + (*func)(); + + /* go to application */ + exitcode = _nolibc_main(argc, argv, envp); + + for (func = __fini_array_end; func > __fini_array_start;) + (*--func)(); + + exit(exitcode); +} + +#endif /* _NOLIBC_CRT_H */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h new file mode 100644 index 000000000000..6f90706d0644 --- /dev/null +++ b/tools/include/nolibc/ctype.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ctype function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_CTYPE_H +#define _NOLIBC_CTYPE_H + +#include "std.h" + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +static __attribute__((unused)) +int isascii(int c) +{ + /* 0x00..0x7f */ + return (unsigned int)c <= 0x7f; +} + +static __attribute__((unused)) +int isblank(int c) +{ + return c == '\t' || c == ' '; +} + +static __attribute__((unused)) +int iscntrl(int c) +{ + /* 0x00..0x1f, 0x7f */ + return (unsigned int)c < 0x20 || c == 0x7f; +} + +static __attribute__((unused)) +int isdigit(int c) +{ + return (unsigned int)(c - '0') < 10; +} + +static __attribute__((unused)) +int isgraph(int c) +{ + /* 0x21..0x7e */ + return (unsigned int)(c - 0x21) < 0x5e; +} + +static __attribute__((unused)) +int islower(int c) +{ + return (unsigned int)(c - 'a') < 26; +} + +static __attribute__((unused)) +int isprint(int c) +{ + /* 0x20..0x7e */ + return (unsigned int)(c - 0x20) < 0x5f; +} + +static __attribute__((unused)) +int isspace(int c) +{ + /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */ + return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5; +} + +static __attribute__((unused)) +int isupper(int c) +{ + return (unsigned int)(c - 'A') < 26; +} + +static __attribute__((unused)) +int isxdigit(int c) +{ + return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6; +} + +static __attribute__((unused)) +int isalpha(int c) +{ + return islower(c) || isupper(c); +} + +static __attribute__((unused)) +int isalnum(int c) +{ + return isalpha(c) || isdigit(c); +} + +static __attribute__((unused)) +int ispunct(int c) +{ + return isgraph(c) && !isalnum(c); +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h new file mode 100644 index 000000000000..a44486ff0477 --- /dev/null +++ b/tools/include/nolibc/errno.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Minimal errno definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_ERRNO_H +#define _NOLIBC_ERRNO_H + +#include <asm/errno.h> + +#ifndef NOLIBC_IGNORE_ERRNO +#define SET_ERRNO(v) do { errno = (v); } while (0) +int errno __attribute__((weak)); +#else +#define SET_ERRNO(v) do { } while (0) +#endif + + +/* errno codes all ensure that they will not conflict with a valid pointer + * because they all correspond to the highest addressable memory page. + */ +#define MAX_ERRNO 4095 + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 2551e9b71167..989e707263a4 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -13,11 +13,10 @@ * Syscalls are split into 3 levels: * - The lower level is the arch-specific syscall() definition, consisting in * assembly code in compound expressions. These are called my_syscall0() to - * my_syscall6() depending on the number of arguments. The MIPS - * implementation is limited to 5 arguments. All input arguments are cast - * to a long stored in a register. These expressions always return the - * syscall's return value as a signed long value which is often either a - * pointer or the negated errno value. + * my_syscall6() depending on the number of arguments. All input arguments + * are castto a long stored in a register. These expressions always return + * the syscall's return value as a signed long value which is often either + * a pointer or the negated errno value. * * - The second level is mostly architecture-independent. It is made of * static functions called sys_<name>() which rely on my_syscallN() @@ -57,21 +56,31 @@ * having to specify anything. * * Finally some very common libc-level functions are provided. It is the case - * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing - * is currently provided regarding stdio emulation. + * for a few functions usually found in string.h, ctype.h, or stdlib.h. * - * The macro NOLIBC is always defined, so that it is possible for a program to - * check this macro to know if it is being built against and decide to disable - * some features or simply not to include some standard libc files. - * - * Ideally this file should be split in multiple files for easier long term - * maintenance, but provided as a single file as it is now, it's quite - * convenient to use. Maybe some variations involving a set of includes at the - * top could work. + * The nolibc.h file is only a convenient entry point which includes all other + * files. It also defines the NOLIBC macro, so that it is possible for a + * program to check this macro to know if it is being built against and decide + * to disable some features or simply not to include some standard libc files. * * A simple static executable may be built this way : * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - * -static -include nolibc.h -lgcc -o hello hello.c + * -static -include nolibc.h -o hello hello.c -lgcc + * + * Simple programs meant to be reasonably portable to various libc and using + * only a few common includes, may also be built by simply making the include + * path point to the nolibc directory: + * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + * -I../nolibc -o hello hello.c -lgcc + * + * The available standard (but limited) include files are: + * ctype.h, errno.h, signal.h, stdarg.h, stdio.h, stdlib.h, string.h, time.h + * + * In addition, the following ones are expected to be provided by the compiler: + * float.h, stddef.h + * + * The following ones which are part to the C standard are not provided: + * assert.h, locale.h, math.h, setjmp.h, limits.h * * A very useful calling convention table may be found here : * http://man7.org/linux/man-pages/man2/syscall.2.html @@ -80,2378 +89,23 @@ * https://w3challs.com/syscalls/ * */ +#ifndef _NOLIBC_H +#define _NOLIBC_H -/* Some archs (at least aarch64) don't expose the regular syscalls anymore by - * default, either because they have an "_at" replacement, or because there are - * more modern alternatives. For now we'd rather still use them. - */ -#define __ARCH_WANT_SYSCALL_NO_AT -#define __ARCH_WANT_SYSCALL_NO_FLAGS -#define __ARCH_WANT_SYSCALL_DEPRECATED - -#include <asm/unistd.h> -#include <asm/ioctls.h> -#include <asm/errno.h> -#include <linux/fs.h> -#include <linux/loop.h> +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" +#include "ctype.h" +#include "signal.h" +#include "unistd.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "time.h" +#include "stackprotector.h" +/* Used by programs to avoid std includes */ #define NOLIBC -/* this way it will be removed if unused */ -static int errno; - -#ifndef NOLIBC_IGNORE_ERRNO -#define SET_ERRNO(v) do { errno = (v); } while (0) -#else -#define SET_ERRNO(v) do { } while (0) -#endif - -/* errno codes all ensure that they will not conflict with a valid pointer - * because they all correspond to the highest addressable memry page. - */ -#define MAX_ERRNO 4095 - -/* Declare a few quite common macros and types that usually are in stdlib.h, - * stdint.h, ctype.h, unistd.h and a few other common locations. - */ - -#define NULL ((void *)0) - -/* stdint types */ -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef unsigned short uint16_t; -typedef signed short int16_t; -typedef unsigned int uint32_t; -typedef signed int int32_t; -typedef unsigned long long uint64_t; -typedef signed long long int64_t; -typedef unsigned long size_t; -typedef signed long ssize_t; -typedef unsigned long uintptr_t; -typedef signed long intptr_t; -typedef signed long ptrdiff_t; - -/* for stat() */ -typedef unsigned int dev_t; -typedef unsigned long ino_t; -typedef unsigned int mode_t; -typedef signed int pid_t; -typedef unsigned int uid_t; -typedef unsigned int gid_t; -typedef unsigned long nlink_t; -typedef signed long off_t; -typedef signed long blksize_t; -typedef signed long blkcnt_t; -typedef signed long time_t; - -/* for poll() */ -struct pollfd { - int fd; - short int events; - short int revents; -}; - -/* for select() */ -struct timeval { - long tv_sec; - long tv_usec; -}; - -/* for pselect() */ -struct timespec { - long tv_sec; - long tv_nsec; -}; - -/* for gettimeofday() */ -struct timezone { - int tz_minuteswest; - int tz_dsttime; -}; - -/* for getdents64() */ -struct linux_dirent64 { - uint64_t d_ino; - int64_t d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[]; -}; - -/* commonly an fd_set represents 256 FDs */ -#define FD_SETSIZE 256 -typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; - -/* needed by wait4() */ -struct rusage { - struct timeval ru_utime; - struct timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - -/* stat flags (WARNING, octal here) */ -#define S_IFDIR 0040000 -#define S_IFCHR 0020000 -#define S_IFBLK 0060000 -#define S_IFREG 0100000 -#define S_IFIFO 0010000 -#define S_IFLNK 0120000 -#define S_IFSOCK 0140000 -#define S_IFMT 0170000 - -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) - -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 - -/* all the *at functions */ -#ifndef AT_FDWCD -#define AT_FDCWD -100 -#endif - -/* lseek */ -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 - -/* reboot */ -#define LINUX_REBOOT_MAGIC1 0xfee1dead -#define LINUX_REBOOT_MAGIC2 0x28121969 -#define LINUX_REBOOT_CMD_HALT 0xcdef0123 -#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc -#define LINUX_REBOOT_CMD_RESTART 0x01234567 -#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 - - -/* The format of the struct as returned by the libc to the application, which - * significantly differs from the format returned by the stat() syscall flavours. - */ -struct stat { - dev_t st_dev; /* ID of device containing file */ - ino_t st_ino; /* inode number */ - mode_t st_mode; /* protection */ - nlink_t st_nlink; /* number of hard links */ - uid_t st_uid; /* user ID of owner */ - gid_t st_gid; /* group ID of owner */ - dev_t st_rdev; /* device ID (if special file) */ - off_t st_size; /* total size, in bytes */ - blksize_t st_blksize; /* blocksize for file system I/O */ - blkcnt_t st_blocks; /* number of 512B blocks allocated */ - time_t st_atime; /* time of last access */ - time_t st_mtime; /* time of last modification */ - time_t st_ctime; /* time of last status change */ -}; - -#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) -#define WIFEXITED(status) (((status) & 0x7f) == 0) - - -/* Below comes the architecture-specific code. For each architecture, we have - * the syscall declarations and the _start code definition. This is the only - * global part. On all architectures the kernel puts everything in the stack - * before jumping to _start just above us, without any return address (_start - * is not a function but an entry pint). So at the stack pointer we find argc. - * Then argv[] begins, and ends at the first NULL. Then we have envp which - * starts and ends with a NULL as well. So envp=argv+argc+1. - */ - -#if defined(__x86_64__) -/* Syscalls for x86_64 : - * - registers are 64-bit - * - syscall number is passed in rax - * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively - * - the system call is performed by calling the syscall instruction - * - syscall return comes in rax - * - rcx and r8..r11 may be clobbered, others are preserved. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - */ - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret), "=r"(_arg4) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "rcx", "r8", "r9", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - register long _arg5 asm("r8") = (long)(arg5); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "rcx", "r9", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _ret; \ - register long _num asm("rax") = (num); \ - register long _arg1 asm("rdi") = (long)(arg1); \ - register long _arg2 asm("rsi") = (long)(arg2); \ - register long _arg3 asm("rdx") = (long)(arg3); \ - register long _arg4 asm("r10") = (long)(arg4); \ - register long _arg5 asm("r8") = (long)(arg5); \ - register long _arg6 asm("r9") = (long)(arg6); \ - \ - asm volatile ( \ - "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %rdi\n" // argc (first arg, %rdi) - "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) - "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee - "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits - "mov $60, %rax\n" // NR_exit == 60 - "syscall\n" // really exit - "hlt\n" // ensure it does not return - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall, equivalent to stat64(). The - * syscall returns 116 bytes and stops in the middle of __unused. - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned int st_mode; - unsigned int st_uid; - - unsigned int st_gid; - unsigned int __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - - long st_blocks; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - long __unused[3]; -}; - -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - register long _arg4 asm("esi") = (long)(arg4); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num asm("eax") = (num); \ - register long _arg1 asm("ebx") = (long)(arg1); \ - register long _arg2 asm("ecx") = (long)(arg2); \ - register long _arg3 asm("edx") = (long)(arg3); \ - register long _arg4 asm("esi") = (long)(arg4); \ - register long _arg5 asm("edi") = (long)(arg5); \ - \ - asm volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %eax\n" // argc (first arg, %eax) - "mov %esp, %ebx\n" // argv[] (second arg, %ebx) - "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when - "push %ecx\n" // push all registers on the stack so that we - "push %ebx\n" // support both regparm and plain stack modes - "push %eax\n" - "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now - "hlt\n" // ensure it does not - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall, 32-bit only, the syscall returns - * exactly 56 bytes (stops before the unused array). - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - - unsigned long st_rdev; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long __unused[2]; -}; - -#elif defined(__ARM_EABI__) -/* Syscalls for ARM in ARM or Thumb modes : - * - registers are 32-bit - * - stack is 8-byte aligned - * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) - * - syscall number is passed in r7 - * - arguments are in r0, r1, r2, r3, r4, r5 - * - the system call is performed by calling svc #0 - * - syscall return comes in r0. - * - only lr is clobbered. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, ARM supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0"); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - register long _arg4 asm("r3") = (long)(arg4); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("r7") = (num); \ - register long _arg1 asm("r0") = (long)(arg1); \ - register long _arg2 asm("r1") = (long)(arg2); \ - register long _arg3 asm("r2") = (long)(arg3); \ - register long _arg4 asm("r3") = (long)(arg4); \ - register long _arg5 asm("r4") = (long)(arg5); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" -#if defined(__THUMBEB__) || defined(__THUMBEL__) - /* We enter here in 32-bit mode but if some previous functions were in - * 16-bit mode, the assembler cannot know, so we need to tell it we're in - * 32-bit now, then switch to 16-bit (is there a better way to do it than - * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that - * it generates correct instructions. Note that we do not support thumb1. - */ - ".code 32\n" - "add r0, pc, #1\n" - "bx r0\n" - ".code 16\n" -#endif - "pop {%r0}\n" // argc was in the stack - "mov %r1, %sp\n" // argv = sp - "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... - "add %r2, %r2, $4\n" // ... + 4 - "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the - "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) - "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits - "movs r7, $1\n" // NR_exit == 1 - "svc $0x00\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x4000 - -/* The struct returned by the stat() syscall, 32-bit only, the syscall returns - * exactly 56 bytes (stops before the unused array). In big endian, the format - * differs as devices are returned as short only. - */ -struct sys_stat_struct { -#if defined(__ARMEB__) - unsigned short st_dev; - unsigned short __pad1; -#else - unsigned long st_dev; -#endif - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; -#if defined(__ARMEB__) - unsigned short st_rdev; - unsigned short __pad2; -#else - unsigned long st_rdev; -#endif - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long __unused[2]; -}; - -#elif defined(__aarch64__) -/* Syscalls for AARCH64 : - * - registers are 64-bit - * - stack is 16-byte aligned - * - syscall number is passed in x8 - * - arguments are in x0, x1, x2, x3, x4, x5 - * - the system call is performed by calling svc 0 - * - syscall return comes in x0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - * On aarch64, select() is not implemented so we have to use pselect6(). - */ -#define __ARCH_WANT_SYS_PSELECT6 - -#define my_syscall0(num) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0"); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - register long _arg5 asm("x4") = (long)(arg5); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num asm("x8") = (num); \ - register long _arg1 asm("x0") = (long)(arg1); \ - register long _arg2 asm("x1") = (long)(arg2); \ - register long _arg3 asm("x2") = (long)(arg3); \ - register long _arg4 asm("x3") = (long)(arg4); \ - register long _arg5 asm("x4") = (long)(arg5); \ - register long _arg6 asm("x5") = (long)(arg6); \ - \ - asm volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - "ldr x0, [sp]\n" // argc (x0) was in the stack - "add x1, sp, 8\n" // argv (x1) = sp - "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... - "add x2, x2, 8\n" // + 8 (skip null) - "add x2, x2, x1\n" // + argv - "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee - "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits - "mov x8, 93\n" // NR_exit == 93 - "svc #0\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x40 -#define O_EXCL 0x80 -#define O_NOCTTY 0x100 -#define O_TRUNC 0x200 -#define O_APPEND 0x400 -#define O_NONBLOCK 0x800 -#define O_DIRECTORY 0x4000 - -/* The struct returned by the newfstatat() syscall. Differs slightly from the - * x86_64's stat one by field ordering, so be careful. - */ -struct sys_stat_struct { - unsigned long st_dev; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - - unsigned long st_rdev; - unsigned long __pad1; - long st_size; - int st_blksize; - int __pad2; - - long st_blocks; - long st_atime; - unsigned long st_atime_nsec; - long st_mtime; - - unsigned long st_mtime_nsec; - long st_ctime; - unsigned long st_ctime_nsec; - unsigned int __unused[2]; -}; - -#elif defined(__mips__) && defined(_ABIO32) -/* Syscalls for MIPS ABI O32 : - * - WARNING! there's always a delayed slot! - * - WARNING again, the syntax is different, registers take a '$' and numbers - * do not. - * - registers are 32-bit - * - stack is 8-byte aligned - * - syscall number is passed in v0 (starts at 0xfa0). - * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to - * leave some room in the stack for the callee to save a0..a3 if needed. - * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are - * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as - * scall32-o32.S in the kernel sources. - * - the system call is performed by calling "syscall" - * - syscall return comes in v0, and register a3 needs to be checked to know - * if an error occured, in which case errno is in v0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - */ - -#define my_syscall0(num) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "r"(_num) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3"); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("v0") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ - \ - asm volatile ( \ - "addiu $sp, $sp, -32\n" \ - "sw %7, 16($sp)\n" \ - "syscall\n " \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ - : "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ - ); \ - _arg4 ? -_num : _num; \ -}) - -/* startup code, note that it's called __start on MIPS */ -asm(".section .text\n" - ".set nomips16\n" - ".global __start\n" - ".set noreorder\n" - ".option pic0\n" - ".ent __start\n" - "__start:\n" - "lw $a0,($sp)\n" // argc was in the stack - "addiu $a1, $sp, 4\n" // argv = sp + 4 - "sll $a2, $a0, 2\n" // a2 = argc * 4 - "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... - "addiu $a2, $a2, 4\n" // ... + 4 - "li $t0, -8\n" - "and $sp, $sp, $t0\n" // sp must be 8-byte aligned - "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! - "jal main\n" // main() returns the status code, we'll exit with it. - "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits - "li $v0, 4001\n" // NR_exit == 4001 - "syscall\n" - ".end __start\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_APPEND 0x0008 -#define O_NONBLOCK 0x0080 -#define O_CREAT 0x0100 -#define O_TRUNC 0x0200 -#define O_EXCL 0x0400 -#define O_NOCTTY 0x0800 -#define O_DIRECTORY 0x10000 - -/* The struct returned by the stat() syscall. 88 bytes are returned by the - * syscall. - */ -struct sys_stat_struct { - unsigned int st_dev; - long st_pad1[3]; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned int st_rdev; - long st_pad2[2]; - long st_size; - long st_pad3; - long st_atime; - long st_atime_nsec; - long st_mtime; - long st_mtime_nsec; - long st_ctime; - long st_ctime_nsec; - long st_blksize; - long st_blocks; - long st_pad4[14]; -}; - -#elif defined(__riscv) - -#if __riscv_xlen == 64 -#define PTRLOG "3" -#define SZREG "8" -#elif __riscv_xlen == 32 -#define PTRLOG "2" -#define SZREG "4" -#endif - -/* Syscalls for RISCV : - * - stack is 16-byte aligned - * - syscall number is passed in a7 - * - arguments are in a0, a1, a2, a3, a4, a5 - * - the system call is performed by calling ecall - * - syscall return comes in a0 - * - the arguments are cast to long and assigned into the target - * registers which are then simply passed as registers to the asm code, - * so that we don't have to experience issues with register constraints. - */ - -#define my_syscall0(num) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0"); \ - \ - asm volatile ( \ - "ecall\n\t" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - \ - asm volatile ( \ - "ecall\n\t" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 asm("a4") = (long)(arg5); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num asm("a7") = (num); \ - register long _arg1 asm("a0") = (long)(arg1); \ - register long _arg2 asm("a1") = (long)(arg2); \ - register long _arg3 asm("a2") = (long)(arg3); \ - register long _arg4 asm("a3") = (long)(arg4); \ - register long _arg5 asm("a4") = (long)(arg5); \ - register long _arg6 asm("a5") = (long)(arg6); \ - \ - asm volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -asm(".section .text\n" - ".global _start\n" - "_start:\n" - ".option push\n" - ".option norelax\n" - "lla gp, __global_pointer$\n" - ".option pop\n" - "ld a0, 0(sp)\n" // argc (a0) was in the stack - "add a1, sp, "SZREG"\n" // argv (a1) = sp - "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... - "add a2, a2, "SZREG"\n" // + SZREG (skip null) - "add a2,a2,a1\n" // + argv - "andi sp,a1,-16\n" // sp must be 16-byte aligned - "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits - "li a7, 93\n" // NR_exit == 93 - "ecall\n" - ""); - -/* fcntl / open */ -#define O_RDONLY 0 -#define O_WRONLY 1 -#define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 - -struct sys_stat_struct { - unsigned long st_dev; /* Device. */ - unsigned long st_ino; /* File serial number. */ - unsigned int st_mode; /* File mode. */ - unsigned int st_nlink; /* Link count. */ - unsigned int st_uid; /* User ID of the file's owner. */ - unsigned int st_gid; /* Group ID of the file's group. */ - unsigned long st_rdev; /* Device number, if device. */ - unsigned long __pad1; - long st_size; /* Size of file, in bytes. */ - int st_blksize; /* Optimal block size for I/O. */ - int __pad2; - long st_blocks; /* Number 512-byte blocks allocated. */ - long st_atime; /* Time of last access. */ - unsigned long st_atime_nsec; - long st_mtime; /* Time of last modification. */ - unsigned long st_mtime_nsec; - long st_ctime; /* Time of last status change. */ - unsigned long st_ctime_nsec; - unsigned int __unused4; - unsigned int __unused5; -}; - -#endif - - -/* Below are the C functions used to declare the raw syscalls. They try to be - * architecture-agnostic, and return either a success or -errno. Declaring them - * static will lead to them being inlined in most cases, but it's still possible - * to reference them by a pointer if needed. - */ -static __attribute__((unused)) -void *sys_brk(void *addr) -{ - return (void *)my_syscall1(__NR_brk, addr); -} - -static __attribute__((noreturn,unused)) -void sys_exit(int status) -{ - my_syscall1(__NR_exit, status & 255); - while(1); // shut the "noreturn" warnings. -} - -static __attribute__((unused)) -int sys_chdir(const char *path) -{ - return my_syscall1(__NR_chdir, path); -} - -static __attribute__((unused)) -int sys_chmod(const char *path, mode_t mode) -{ -#ifdef __NR_fchmodat - return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#else - return my_syscall2(__NR_chmod, path, mode); -#endif -} - -static __attribute__((unused)) -int sys_chown(const char *path, uid_t owner, gid_t group) -{ -#ifdef __NR_fchownat - return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#else - return my_syscall3(__NR_chown, path, owner, group); -#endif -} - -static __attribute__((unused)) -int sys_chroot(const char *path) -{ - return my_syscall1(__NR_chroot, path); -} - -static __attribute__((unused)) -int sys_close(int fd) -{ - return my_syscall1(__NR_close, fd); -} - -static __attribute__((unused)) -int sys_dup(int fd) -{ - return my_syscall1(__NR_dup, fd); -} - -static __attribute__((unused)) -int sys_dup2(int old, int new) -{ - return my_syscall2(__NR_dup2, old, new); -} - -static __attribute__((unused)) -int sys_execve(const char *filename, char *const argv[], char *const envp[]) -{ - return my_syscall3(__NR_execve, filename, argv, envp); -} - -static __attribute__((unused)) -pid_t sys_fork(void) -{ - return my_syscall0(__NR_fork); -} - -static __attribute__((unused)) -int sys_fsync(int fd) -{ - return my_syscall1(__NR_fsync, fd); -} - -static __attribute__((unused)) -int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - return my_syscall3(__NR_getdents64, fd, dirp, count); -} - -static __attribute__((unused)) -pid_t sys_getpgrp(void) -{ - return my_syscall0(__NR_getpgrp); -} - -static __attribute__((unused)) -pid_t sys_getpid(void) -{ - return my_syscall0(__NR_getpid); -} - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return my_syscall2(__NR_gettimeofday, tv, tz); -} - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int sys_kill(pid_t pid, int signal) -{ - return my_syscall2(__NR_kill, pid, signal); -} - -static __attribute__((unused)) -int sys_link(const char *old, const char *new) -{ -#ifdef __NR_linkat - return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#else - return my_syscall2(__NR_link, old, new); -#endif -} - -static __attribute__((unused)) -off_t sys_lseek(int fd, off_t offset, int whence) -{ - return my_syscall3(__NR_lseek, fd, offset, whence); -} - -static __attribute__((unused)) -int sys_mkdir(const char *path, mode_t mode) -{ -#ifdef __NR_mkdirat - return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#else - return my_syscall2(__NR_mkdir, path, mode); -#endif -} - -static __attribute__((unused)) -long sys_mknod(const char *path, mode_t mode, dev_t dev) -{ -#ifdef __NR_mknodat - return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#else - return my_syscall3(__NR_mknod, path, mode, dev); -#endif -} - -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#else - return my_syscall3(__NR_open, path, flags, mode); -#endif -} - -static __attribute__((unused)) -int sys_pivot_root(const char *new, const char *old) -{ - return my_syscall2(__NR_pivot_root, new, old); -} - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ - return my_syscall3(__NR_poll, fds, nfds, timeout); -} - -static __attribute__((unused)) -ssize_t sys_read(int fd, void *buf, size_t count) -{ - return my_syscall3(__NR_read, fd, buf, count); -} - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int sys_sched_yield(void) -{ - return my_syscall0(__NR_sched_yield); -} - -static __attribute__((unused)) -int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) - struct timespec t; - - if (timeout) { - t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; - } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else -#ifndef __NR__newselect -#define __NR__newselect __NR_select -#endif - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#endif -} - -static __attribute__((unused)) -int sys_setpgid(pid_t pid, pid_t pgid) -{ - return my_syscall2(__NR_setpgid, pid, pgid); -} - -static __attribute__((unused)) -pid_t sys_setsid(void) -{ - return my_syscall0(__NR_setsid); -} - -static __attribute__((unused)) -int sys_stat(const char *path, struct stat *buf) -{ - struct sys_stat_struct stat; - long ret; - -#ifdef __NR_newfstatat - /* only solution for arm64 */ - ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); -#else - ret = my_syscall2(__NR_stat, path, &stat); -#endif - buf->st_dev = stat.st_dev; - buf->st_ino = stat.st_ino; - buf->st_mode = stat.st_mode; - buf->st_nlink = stat.st_nlink; - buf->st_uid = stat.st_uid; - buf->st_gid = stat.st_gid; - buf->st_rdev = stat.st_rdev; - buf->st_size = stat.st_size; - buf->st_blksize = stat.st_blksize; - buf->st_blocks = stat.st_blocks; - buf->st_atime = stat.st_atime; - buf->st_mtime = stat.st_mtime; - buf->st_ctime = stat.st_ctime; - return ret; -} - - -static __attribute__((unused)) -int sys_symlink(const char *old, const char *new) -{ -#ifdef __NR_symlinkat - return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#else - return my_syscall2(__NR_symlink, old, new); -#endif -} - -static __attribute__((unused)) -mode_t sys_umask(mode_t mode) -{ - return my_syscall1(__NR_umask, mode); -} - -static __attribute__((unused)) -int sys_umount2(const char *path, int flags) -{ - return my_syscall2(__NR_umount2, path, flags); -} - -static __attribute__((unused)) -int sys_unlink(const char *path) -{ -#ifdef __NR_unlinkat - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#else - return my_syscall1(__NR_unlink, path); -#endif -} - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return my_syscall4(__NR_wait4, pid, status, options, rusage); -} - -static __attribute__((unused)) -pid_t sys_waitpid(pid_t pid, int *status, int options) -{ - return sys_wait4(pid, status, options, 0); -} - -static __attribute__((unused)) -pid_t sys_wait(int *status) -{ - return sys_waitpid(-1, status, 0); -} - -static __attribute__((unused)) -ssize_t sys_write(int fd, const void *buf, size_t count) -{ - return my_syscall3(__NR_write, fd, buf, count); -} - - -/* Below are the libc-compatible syscalls which return x or -1 and set errno. - * They rely on the functions above. Similarly they're marked static so that it - * is possible to assign pointers to them if needed. - */ - -static __attribute__((unused)) -int brk(void *addr) -{ - void *ret = sys_brk(addr); - - if (!ret) { - SET_ERRNO(ENOMEM); - return -1; - } - return 0; -} - -static __attribute__((noreturn,unused)) -void exit(int status) -{ - sys_exit(status); -} - -static __attribute__((unused)) -int chdir(const char *path) -{ - int ret = sys_chdir(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chmod(const char *path, mode_t mode) -{ - int ret = sys_chmod(path, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chown(const char *path, uid_t owner, gid_t group) -{ - int ret = sys_chown(path, owner, group); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int chroot(const char *path) -{ - int ret = sys_chroot(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int close(int fd) -{ - int ret = sys_close(fd); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int dup2(int old, int new) -{ - int ret = sys_dup2(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int execve(const char *filename, char *const argv[], char *const envp[]) -{ - int ret = sys_execve(filename, argv, envp); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t fork(void) -{ - pid_t ret = sys_fork(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int fsync(int fd) -{ - int ret = sys_fsync(fd); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - int ret = sys_getdents64(fd, dirp, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t getpgrp(void) -{ - pid_t ret = sys_getpgrp(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t getpid(void) -{ - pid_t ret = sys_getpid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - int ret = sys_gettimeofday(tv, tz); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - int ret = sys_ioctl(fd, req, value); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int kill(pid_t pid, int signal) -{ - int ret = sys_kill(pid, signal); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int link(const char *old, const char *new) -{ - int ret = sys_link(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -off_t lseek(int fd, off_t offset, int whence) -{ - off_t ret = sys_lseek(fd, offset, whence); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mkdir(const char *path, mode_t mode) -{ - int ret = sys_mkdir(path, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mknod(const char *path, mode_t mode, dev_t dev) -{ - int ret = sys_mknod(path, mode, dev); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - int ret = sys_mount(src, tgt, fst, flags, data); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int open(const char *path, int flags, mode_t mode) -{ - int ret = sys_open(path, flags, mode); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int pivot_root(const char *new, const char *old) -{ - int ret = sys_pivot_root(new, old); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - int ret = sys_poll(fds, nfds, timeout); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -ssize_t read(int fd, void *buf, size_t count) -{ - ssize_t ret = sys_read(fd, buf, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -void *sbrk(intptr_t inc) -{ - void *ret; - - /* first call to find current end */ - if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) - return ret + inc; - - SET_ERRNO(ENOMEM); - return (void *)-1; -} - -static __attribute__((unused)) -int sched_yield(void) -{ - int ret = sys_sched_yield(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ - int ret = sys_select(nfds, rfds, wfds, efds, timeout); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int setpgid(pid_t pid, pid_t pgid) -{ - int ret = sys_setpgid(pid, pgid); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t setsid(void) -{ - pid_t ret = sys_setsid(); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -unsigned int sleep(unsigned int seconds) -{ - struct timeval my_timeval = { seconds, 0 }; - - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) - return my_timeval.tv_sec + !!my_timeval.tv_usec; - else - return 0; -} - -static __attribute__((unused)) -int stat(const char *path, struct stat *buf) -{ - int ret = sys_stat(path, buf); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int symlink(const char *old, const char *new) -{ - int ret = sys_symlink(old, new); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int tcsetpgrp(int fd, pid_t pid) -{ - return ioctl(fd, TIOCSPGRP, &pid); -} - -static __attribute__((unused)) -mode_t umask(mode_t mode) -{ - return sys_umask(mode); -} - -static __attribute__((unused)) -int umount2(const char *path, int flags) -{ - int ret = sys_umount2(path, flags); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -int unlink(const char *path) -{ - int ret = sys_unlink(path); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - pid_t ret = sys_wait4(pid, status, options, rusage); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - pid_t ret = sys_waitpid(pid, status, options); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - pid_t ret = sys_wait(status); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -static __attribute__((unused)) -ssize_t write(int fd, const void *buf, size_t count) -{ - ssize_t ret = sys_write(fd, buf, count); - - if (ret < 0) { - SET_ERRNO(-ret); - ret = -1; - } - return ret; -} - -/* some size-optimized reimplementations of a few common str* and mem* - * functions. They're marked static, except memcpy() and raise() which are used - * by libgcc on ARM, so they are marked weak instead in order not to cause an - * error when building a program made of multiple files (not recommended). - */ - -static __attribute__((unused)) -void *memmove(void *dst, const void *src, size_t len) -{ - ssize_t pos = (dst <= src) ? -1 : (long)len; - void *ret = dst; - - while (len--) { - pos += (dst <= src) ? 1 : -1; - ((char *)dst)[pos] = ((char *)src)[pos]; - } - return ret; -} - -static __attribute__((unused)) -void *memset(void *dst, int b, size_t len) -{ - char *p = dst; - - while (len--) - *(p++) = b; - return dst; -} - -static __attribute__((unused)) -int memcmp(const void *s1, const void *s2, size_t n) -{ - size_t ofs = 0; - char c1 = 0; - - while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { - ofs++; - } - return c1; -} - -static __attribute__((unused)) -char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - while ((*dst++ = *src++)); - return ret; -} - -static __attribute__((unused)) -char *strchr(const char *s, int c) -{ - while (*s) { - if (*s == (char)c) - return (char *)s; - s++; - } - return NULL; -} - -static __attribute__((unused)) -char *strrchr(const char *s, int c) -{ - const char *ret = NULL; - - while (*s) { - if (*s == (char)c) - ret = s; - s++; - } - return (char *)ret; -} - -static __attribute__((unused)) -size_t nolibc_strlen(const char *str) -{ - size_t len; - - for (len = 0; str[len]; len++); - return len; -} - -#define strlen(str) ({ \ - __builtin_constant_p((str)) ? \ - __builtin_strlen((str)) : \ - nolibc_strlen((str)); \ -}) - -static __attribute__((unused)) -int isdigit(int c) -{ - return (unsigned int)(c - '0') <= 9; -} - -static __attribute__((unused)) -long atol(const char *s) -{ - unsigned long ret = 0; - unsigned long d; - int neg = 0; - - if (*s == '-') { - neg = 1; - s++; - } - - while (1) { - d = (*s++) - '0'; - if (d > 9) - break; - ret *= 10; - ret += d; - } - - return neg ? -ret : ret; -} - -static __attribute__((unused)) -int atoi(const char *s) -{ - return atol(s); -} - -static __attribute__((unused)) -const char *ltoa(long in) -{ - /* large enough for -9223372036854775808 */ - static char buffer[21]; - char *pos = buffer + sizeof(buffer) - 1; - int neg = in < 0; - unsigned long n = neg ? -in : in; - - *pos-- = '\0'; - do { - *pos-- = '0' + n % 10; - n /= 10; - if (pos < buffer) - return pos + 1; - } while (n); - - if (neg) - *pos-- = '-'; - return pos + 1; -} - -__attribute__((weak,unused)) -void *memcpy(void *dst, const void *src, size_t len) -{ - return memmove(dst, src, len); -} - -/* needed by libgcc for divide by zero */ -__attribute__((weak,unused)) -int raise(int signal) -{ - return kill(getpid(), signal); -} - -/* Here come a few helper functions */ - -static __attribute__((unused)) -void FD_ZERO(fd_set *set) -{ - memset(set, 0, sizeof(*set)); -} - -static __attribute__((unused)) -void FD_SET(int fd, fd_set *set) -{ - if (fd < 0 || fd >= FD_SETSIZE) - return; - set->fd32[fd / 32] |= 1 << (fd & 31); -} - -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -static __attribute__((unused)) -dev_t makedev(unsigned int major, unsigned int minor) -{ - return ((major & 0xfff) << 8) | (minor & 0xff); -} +#endif /* _NOLIBC_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h new file mode 100644 index 000000000000..137552216e46 --- /dev/null +++ b/tools/include/nolibc/signal.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * signal function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_SIGNAL_H +#define _NOLIBC_SIGNAL_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* This one is not marked static as it's needed by libgcc for divide by zero */ +__attribute__((weak,unused,section(".text.nolibc_raise"))) +int raise(int signal) +{ + return sys_kill(sys_getpid(), signal); +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h new file mode 100644 index 000000000000..13f1d0e60387 --- /dev/null +++ b/tools/include/nolibc/stackprotector.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stack protector support for NOLIBC + * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_STACKPROTECTOR_H +#define _NOLIBC_STACKPROTECTOR_H + +#include "compiler.h" + +#if defined(_NOLIBC_STACKPROTECTOR) + +#include "sys.h" +#include "stdlib.h" + +/* The functions in this header are using raw syscall macros to avoid + * triggering stack protector errors themselves + */ + +__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) +void __stack_chk_fail(void) +{ + pid_t pid; + my_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28); + pid = my_syscall0(__NR_getpid); + my_syscall2(__NR_kill, pid, SIGABRT); + for (;;); +} + +__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) +void __stack_chk_fail_local(void) +{ + __stack_chk_fail(); +} + +__attribute__((weak,section(".data.nolibc_stack_chk"))) +uintptr_t __stack_chk_guard; + +static __no_stack_protector void __stack_chk_init(void) +{ + my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0); + /* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */ + if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard) + __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard; +} +#else /* !defined(_NOLIBC_STACKPROTECTOR) */ +static void __stack_chk_init(void) {} +#endif /* defined(_NOLIBC_STACKPROTECTOR) */ + +#endif /* _NOLIBC_STACKPROTECTOR_H */ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h new file mode 100644 index 000000000000..933bc0be7e1c --- /dev/null +++ b/tools/include/nolibc/std.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Standard definitions and types for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STD_H +#define _NOLIBC_STD_H + +/* Declare a few quite common macros and types that usually are in stdlib.h, + * stdint.h, ctype.h, unistd.h and a few other common locations. Please place + * integer type definitions and generic macros here, but avoid OS-specific and + * syscall-specific stuff, as this file is expected to be included very early. + */ + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#include "stdint.h" + +/* those are commonly provided by sys/types.h */ +typedef unsigned int dev_t; +typedef unsigned long ino_t; +typedef unsigned int mode_t; +typedef signed int pid_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long nlink_t; +typedef signed long off_t; +typedef signed long blksize_t; +typedef signed long blkcnt_t; +typedef signed long time_t; + +#endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stdarg.h b/tools/include/nolibc/stdarg.h new file mode 100644 index 000000000000..c628b5783da6 --- /dev/null +++ b/tools/include/nolibc/stdarg.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Variadic argument support for NOLIBC + * Copyright (C) 2005-2020 Rich Felker, et al. + */ + +#ifndef _NOLIBC_STDARG_H +#define _NOLIBC_STDARG_H + +typedef __builtin_va_list va_list; +#define va_start(v, l) __builtin_va_start(v, l) +#define va_end(v) __builtin_va_end(v) +#define va_arg(v, l) __builtin_va_arg(v, l) +#define va_copy(d, s) __builtin_va_copy(d, s) + +#endif /* _NOLIBC_STDARG_H */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h new file mode 100644 index 000000000000..6665e272e213 --- /dev/null +++ b/tools/include/nolibc/stdint.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Standard definitions and types for NOLIBC + * Copyright (C) 2023 Vincent Dagonneau <v@vda.io> + */ + +#ifndef _NOLIBC_STDINT_H +#define _NOLIBC_STDINT_H + +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +typedef unsigned long long uint64_t; +typedef signed long long int64_t; +typedef __SIZE_TYPE__ size_t; +typedef signed long ssize_t; +typedef unsigned long uintptr_t; +typedef signed long intptr_t; +typedef signed long ptrdiff_t; + +typedef int8_t int_least8_t; +typedef uint8_t uint_least8_t; +typedef int16_t int_least16_t; +typedef uint16_t uint_least16_t; +typedef int32_t int_least32_t; +typedef uint32_t uint_least32_t; +typedef int64_t int_least64_t; +typedef uint64_t uint_least64_t; + +typedef int8_t int_fast8_t; +typedef uint8_t uint_fast8_t; +typedef ssize_t int_fast16_t; +typedef size_t uint_fast16_t; +typedef ssize_t int_fast32_t; +typedef size_t uint_fast32_t; +typedef int64_t int_fast64_t; +typedef uint64_t uint_fast64_t; + +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + +/* limits of integral types */ + +#define INT8_MIN (-128) +#define INT16_MIN (-32767-1) +#define INT32_MIN (-2147483647-1) +#define INT64_MIN (-9223372036854775807LL-1) + +#define INT8_MAX (127) +#define INT16_MAX (32767) +#define INT32_MAX (2147483647) +#define INT64_MAX (9223372036854775807LL) + +#define UINT8_MAX (255) +#define UINT16_MAX (65535) +#define UINT32_MAX (4294967295U) +#define UINT64_MAX (18446744073709551615ULL) + +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST64_MIN INT64_MIN + +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MAX INT64_MAX + +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +#define SIZE_MAX ((size_t)(__LONG_MAX__) * 2 + 1) +#define INTPTR_MIN (-__LONG_MAX__ - 1) +#define INTPTR_MAX __LONG_MAX__ +#define PTRDIFF_MIN INTPTR_MIN +#define PTRDIFF_MAX INTPTR_MAX +#define UINTPTR_MAX SIZE_MAX + +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST16_MIN INTPTR_MIN +#define INT_FAST32_MIN INTPTR_MIN +#define INT_FAST64_MIN INT64_MIN + +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MAX INTPTR_MAX +#define INT_FAST32_MAX INTPTR_MAX +#define INT_FAST64_MAX INT64_MAX + +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX SIZE_MAX +#define UINT_FAST32_MAX SIZE_MAX +#define UINT_FAST64_MAX UINT64_MAX + +#ifndef INT_MIN +#define INT_MIN (-__INT_MAX__ - 1) +#endif +#ifndef INT_MAX +#define INT_MAX __INT_MAX__ +#endif + +#ifndef LONG_MIN +#define LONG_MIN (-__LONG_MAX__ - 1) +#endif +#ifndef LONG_MAX +#define LONG_MAX __LONG_MAX__ +#endif + +#endif /* _NOLIBC_STDINT_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h new file mode 100644 index 000000000000..16cd4d807251 --- /dev/null +++ b/tools/include/nolibc/stdio.h @@ -0,0 +1,382 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * minimal stdio function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STDIO_H +#define _NOLIBC_STDIO_H + +#include "std.h" +#include "arch.h" +#include "errno.h" +#include "types.h" +#include "sys.h" +#include "stdarg.h" +#include "stdlib.h" +#include "string.h" + +#ifndef EOF +#define EOF (-1) +#endif + +/* Buffering mode used by setvbuf. */ +#define _IOFBF 0 /* Fully buffered. */ +#define _IOLBF 1 /* Line buffered. */ +#define _IONBF 2 /* No buffering. */ + +/* just define FILE as a non-empty type. The value of the pointer gives + * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE + * are immediately identified as abnormal entries (i.e. possible copies + * of valid pointers to something else). + */ +typedef struct FILE { + char dummy[1]; +} FILE; + +static __attribute__((unused)) FILE* const stdin = (FILE*)(intptr_t)~STDIN_FILENO; +static __attribute__((unused)) FILE* const stdout = (FILE*)(intptr_t)~STDOUT_FILENO; +static __attribute__((unused)) FILE* const stderr = (FILE*)(intptr_t)~STDERR_FILENO; + +/* provides a FILE* equivalent of fd. The mode is ignored. */ +static __attribute__((unused)) +FILE *fdopen(int fd, const char *mode __attribute__((unused))) +{ + if (fd < 0) { + SET_ERRNO(EBADF); + return NULL; + } + return (FILE*)(intptr_t)~fd; +} + +/* provides the fd of stream. */ +static __attribute__((unused)) +int fileno(FILE *stream) +{ + intptr_t i = (intptr_t)stream; + + if (i >= 0) { + SET_ERRNO(EBADF); + return -1; + } + return ~i; +} + +/* flush a stream. */ +static __attribute__((unused)) +int fflush(FILE *stream) +{ + intptr_t i = (intptr_t)stream; + + /* NULL is valid here. */ + if (i > 0) { + SET_ERRNO(EBADF); + return -1; + } + + /* Don't do anything, nolibc does not support buffering. */ + return 0; +} + +/* flush a stream. */ +static __attribute__((unused)) +int fclose(FILE *stream) +{ + intptr_t i = (intptr_t)stream; + + if (i >= 0) { + SET_ERRNO(EBADF); + return -1; + } + + if (close(~i)) + return EOF; + + return 0; +} + +/* getc(), fgetc(), getchar() */ + +#define getc(stream) fgetc(stream) + +static __attribute__((unused)) +int fgetc(FILE* stream) +{ + unsigned char ch; + + if (read(fileno(stream), &ch, 1) <= 0) + return EOF; + return ch; +} + +static __attribute__((unused)) +int getchar(void) +{ + return fgetc(stdin); +} + + +/* putc(), fputc(), putchar() */ + +#define putc(c, stream) fputc(c, stream) + +static __attribute__((unused)) +int fputc(int c, FILE* stream) +{ + unsigned char ch = c; + + if (write(fileno(stream), &ch, 1) <= 0) + return EOF; + return ch; +} + +static __attribute__((unused)) +int putchar(int c) +{ + return fputc(c, stdout); +} + + +/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ + +/* internal fwrite()-like function which only takes a size and returns 0 on + * success or EOF on error. It automatically retries on short writes. + */ +static __attribute__((unused)) +int _fwrite(const void *buf, size_t size, FILE *stream) +{ + ssize_t ret; + int fd = fileno(stream); + + while (size) { + ret = write(fd, buf, size); + if (ret <= 0) + return EOF; + size -= ret; + buf += ret; + } + return 0; +} + +static __attribute__((unused)) +size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream) +{ + size_t written; + + for (written = 0; written < nmemb; written++) { + if (_fwrite(s, size, stream) != 0) + break; + s += size; + } + return written; +} + +static __attribute__((unused)) +int fputs(const char *s, FILE *stream) +{ + return _fwrite(s, strlen(s), stream); +} + +static __attribute__((unused)) +int puts(const char *s) +{ + if (fputs(s, stdout) == EOF) + return EOF; + return putchar('\n'); +} + + +/* fgets() */ +static __attribute__((unused)) +char *fgets(char *s, int size, FILE *stream) +{ + int ofs; + int c; + + for (ofs = 0; ofs + 1 < size;) { + c = fgetc(stream); + if (c == EOF) + break; + s[ofs++] = c; + if (c == '\n') + break; + } + if (ofs < size) + s[ofs] = 0; + return ofs ? s : NULL; +} + + +/* minimal vfprintf(). It supports the following formats: + * - %[l*]{d,u,c,x,p} + * - %s + * - unknown modifiers are ignored. + */ +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + char escape, lpref, c; + unsigned long long v; + unsigned int written; + size_t len, ofs; + char tmpbuf[21]; + const char *outstr; + + written = ofs = escape = lpref = 0; + while (1) { + c = fmt[ofs++]; + + if (escape) { + /* we're in an escape sequence, ofs == 1 */ + escape = 0; + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { + char *out = tmpbuf; + + if (c == 'p') + v = va_arg(args, unsigned long); + else if (lpref) { + if (lpref > 1) + v = va_arg(args, unsigned long long); + else + v = va_arg(args, unsigned long); + } else + v = va_arg(args, unsigned int); + + if (c == 'd') { + /* sign-extend the value */ + if (lpref == 0) + v = (long long)(int)v; + else if (lpref == 1) + v = (long long)(long)v; + } + + switch (c) { + case 'c': + out[0] = v; + out[1] = 0; + break; + case 'd': + i64toa_r(v, out); + break; + case 'u': + u64toa_r(v, out); + break; + case 'p': + *(out++) = '0'; + *(out++) = 'x'; + /* fall through */ + default: /* 'x' and 'p' above */ + u64toh_r(v, out); + break; + } + outstr = tmpbuf; + } + else if (c == 's') { + outstr = va_arg(args, char *); + if (!outstr) + outstr="(null)"; + } + else if (c == '%') { + /* queue it verbatim */ + continue; + } + else { + /* modifiers or final 0 */ + if (c == 'l') { + /* long format prefix, maintain the escape */ + lpref++; + } + escape = 1; + goto do_escape; + } + len = strlen(outstr); + goto flush_str; + } + + /* not an escape sequence */ + if (c == 0 || c == '%') { + /* flush pending data on escape or end */ + escape = 1; + lpref = 0; + outstr = fmt; + len = ofs - 1; + flush_str: + if (_fwrite(outstr, len, stream) != 0) + break; + + written += len; + do_escape: + if (c == 0) + break; + fmt += ofs; + ofs = 0; + continue; + } + + /* literal char, just queue it */ + } + return written; +} + +static __attribute__((unused, format(printf, 1, 0))) +int vprintf(const char *fmt, va_list args) +{ + return vfprintf(stdout, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int fprintf(FILE *stream, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vfprintf(stream, fmt, args); + va_end(args); + return ret; +} + +static __attribute__((unused, format(printf, 1, 2))) +int printf(const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vfprintf(stdout, fmt, args); + va_end(args); + return ret; +} + +static __attribute__((unused)) +void perror(const char *msg) +{ + fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); +} + +static __attribute__((unused)) +int setvbuf(FILE *stream __attribute__((unused)), + char *buf __attribute__((unused)), + int mode, + size_t size __attribute__((unused))) +{ + /* + * nolibc does not support buffering so this is a nop. Just check mode + * is valid as required by the spec. + */ + switch (mode) { + case _IOFBF: + case _IOLBF: + case _IONBF: + break; + default: + return EOF; + } + + return 0; +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h new file mode 100644 index 000000000000..bacfd35c5156 --- /dev/null +++ b/tools/include/nolibc/stdlib.h @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stdlib function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STDLIB_H +#define _NOLIBC_STDLIB_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" +#include "string.h" +#include <linux/auxvec.h> + +struct nolibc_heap { + size_t len; + char user_p[] __attribute__((__aligned__)); +}; + +/* Buffer used to store int-to-ASCII conversions. Will only be implemented if + * any of the related functions is implemented. The area is large enough to + * store "18446744073709551615" or "-9223372036854775808" and the final zero. + */ +static __attribute__((unused)) char itoa_buffer[21]; + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +/* must be exported, as it's used by libgcc for various divide functions */ +__attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) +void abort(void) +{ + sys_kill(sys_getpid(), SIGABRT); + for (;;); +} + +static __attribute__((unused)) +long atol(const char *s) +{ + unsigned long ret = 0; + unsigned long d; + int neg = 0; + + if (*s == '-') { + neg = 1; + s++; + } + + while (1) { + d = (*s++) - '0'; + if (d > 9) + break; + ret *= 10; + ret += d; + } + + return neg ? -ret : ret; +} + +static __attribute__((unused)) +int atoi(const char *s) +{ + return atol(s); +} + +static __attribute__((unused)) +void free(void *ptr) +{ + struct nolibc_heap *heap; + + if (!ptr) + return; + + heap = container_of(ptr, struct nolibc_heap, user_p); + munmap(heap, heap->len); +} + +/* getenv() tries to find the environment variable named <name> in the + * environment array pointed to by global variable "environ" which must be + * declared as a char **, and must be terminated by a NULL (it is recommended + * to set this variable to the "envp" argument of main()). If the requested + * environment variable exists its value is returned otherwise NULL is + * returned. + */ +static __attribute__((unused)) +char *getenv(const char *name) +{ + int idx, i; + + if (environ) { + for (idx = 0; environ[idx]; idx++) { + for (i = 0; name[i] && name[i] == environ[idx][i];) + i++; + if (!name[i] && environ[idx][i] == '=') + return &environ[idx][i+1]; + } + } + return NULL; +} + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +static __attribute__((unused)) +void *malloc(size_t len) +{ + struct nolibc_heap *heap; + + /* Always allocate memory with size multiple of 4096. */ + len = sizeof(*heap) + len; + len = (len + 4095UL) & -4096UL; + heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, + -1, 0); + if (__builtin_expect(heap == MAP_FAILED, 0)) + return NULL; + + heap->len = len; + return heap->user_p; +} + +static __attribute__((unused)) +void *calloc(size_t size, size_t nmemb) +{ + size_t x = size * nmemb; + + if (__builtin_expect(size && ((x / size) != nmemb), 0)) { + SET_ERRNO(ENOMEM); + return NULL; + } + + /* + * No need to zero the heap, the MAP_ANONYMOUS in malloc() + * already does it. + */ + return malloc(x); +} + +static __attribute__((unused)) +void *realloc(void *old_ptr, size_t new_size) +{ + struct nolibc_heap *heap; + size_t user_p_len; + void *ret; + + if (!old_ptr) + return malloc(new_size); + + heap = container_of(old_ptr, struct nolibc_heap, user_p); + user_p_len = heap->len - sizeof(*heap); + /* + * Don't realloc() if @user_p_len >= @new_size, this block of + * memory is still enough to handle the @new_size. Just return + * the same pointer. + */ + if (user_p_len >= new_size) + return old_ptr; + + ret = malloc(new_size); + if (__builtin_expect(!ret, 0)) + return NULL; + + memcpy(ret, heap->user_p, heap->len); + munmap(heap, heap->len); + return ret; +} + +/* Converts the unsigned long integer <in> to its hex representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The + * buffer is filled from the first byte, and the number of characters emitted + * (not counting the trailing zero) is returned. The function is constructed + * in a way to optimize the code size and avoid any divide that could add a + * dependency on large external functions. + */ +static __attribute__((unused)) +int utoh_r(unsigned long in, char *buffer) +{ + signed char pos = (~0UL > 0xfffffffful) ? 60 : 28; + int digits = 0; + int dig; + + do { + dig = in >> pos; + in -= (uint64_t)dig << pos; + pos -= 4; + if (dig || digits || pos < 0) { + if (dig > 9) + dig += 'a' - '0' - 10; + buffer[digits++] = '0' + dig; + } + } while (pos >= 0); + + buffer[digits] = 0; + return digits; +} + +/* converts unsigned long <in> to an hex string using the static itoa_buffer + * and returns the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *utoh(unsigned long in) +{ + utoh_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned long integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for + * 4294967295 in 32-bit). The buffer is filled from the first byte, and the + * number of characters emitted (not counting the trailing zero) is returned. + * The function is constructed in a way to optimize the code size and avoid + * any divide that could add a dependency on large external functions. + */ +static __attribute__((unused)) +int utoa_r(unsigned long in, char *buffer) +{ + unsigned long lim; + int digits = 0; + int pos = (~0UL > 0xfffffffful) ? 19 : 9; + int dig; + + do { + for (dig = 0, lim = 1; dig < pos; dig++) + lim *= 10; + + if (digits || in >= lim || !pos) { + for (dig = 0; in >= lim; dig++) + in -= lim; + buffer[digits++] = '0' + dig; + } + } while (pos--); + + buffer[digits] = 0; + return digits; +} + +/* Converts the signed long integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for + * -2147483648 in 32-bit). The buffer is filled from the first byte, and the + * number of characters emitted (not counting the trailing zero) is returned. + */ +static __attribute__((unused)) +int itoa_r(long in, char *buffer) +{ + char *ptr = buffer; + int len = 0; + + if (in < 0) { + in = -in; + *(ptr++) = '-'; + len++; + } + len += utoa_r(in, ptr); + return len; +} + +/* for historical compatibility, same as above but returns the pointer to the + * buffer. + */ +static __inline__ __attribute__((unused)) +char *ltoa_r(long in, char *buffer) +{ + itoa_r(in, buffer); + return buffer; +} + +/* converts long integer <in> to a string using the static itoa_buffer and + * returns the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *itoa(long in) +{ + itoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts long integer <in> to a string using the static itoa_buffer and + * returns the pointer to that string. Same as above, for compatibility. + */ +static __inline__ __attribute__((unused)) +char *ltoa(long in) +{ + itoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts unsigned long integer <in> to a string using the static itoa_buffer + * and returns the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *utoa(unsigned long in) +{ + utoa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned 64-bit integer <in> to its hex representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. The function is constructed in a way to optimize + * the code size and avoid any divide that could add a dependency on large + * external functions. + */ +static __attribute__((unused)) +int u64toh_r(uint64_t in, char *buffer) +{ + signed char pos = 60; + int digits = 0; + int dig; + + do { + if (sizeof(long) >= 8) { + dig = (in >> pos) & 0xF; + } else { + /* 32-bit platforms: avoid a 64-bit shift */ + uint32_t d = (pos >= 32) ? (in >> 32) : in; + dig = (d >> (pos & 31)) & 0xF; + } + if (dig > 9) + dig += 'a' - '0' - 10; + pos -= 4; + if (dig || digits || pos < 0) + buffer[digits++] = '0' + dig; + } while (pos >= 0); + + buffer[digits] = 0; + return digits; +} + +/* converts uint64_t <in> to an hex string using the static itoa_buffer and + * returns the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *u64toh(uint64_t in) +{ + u64toh_r(in, itoa_buffer); + return itoa_buffer; +} + +/* Converts the unsigned 64-bit integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. The function is constructed in a way to optimize + * the code size and avoid any divide that could add a dependency on large + * external functions. + */ +static __attribute__((unused)) +int u64toa_r(uint64_t in, char *buffer) +{ + unsigned long long lim; + int digits = 0; + int pos = 19; /* start with the highest possible digit */ + int dig; + + do { + for (dig = 0, lim = 1; dig < pos; dig++) + lim *= 10; + + if (digits || in >= lim || !pos) { + for (dig = 0; in >= lim; dig++) + in -= lim; + buffer[digits++] = '0' + dig; + } + } while (pos--); + + buffer[digits] = 0; + return digits; +} + +/* Converts the signed 64-bit integer <in> to its string representation into + * buffer <buffer>, which must be long enough to store the number and the + * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from + * the first byte, and the number of characters emitted (not counting the + * trailing zero) is returned. + */ +static __attribute__((unused)) +int i64toa_r(int64_t in, char *buffer) +{ + char *ptr = buffer; + int len = 0; + + if (in < 0) { + in = -in; + *(ptr++) = '-'; + len++; + } + len += u64toa_r(in, ptr); + return len; +} + +/* converts int64_t <in> to a string using the static itoa_buffer and returns + * the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *i64toa(int64_t in) +{ + i64toa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* converts uint64_t <in> to a string using the static itoa_buffer and returns + * the pointer to that string. + */ +static __inline__ __attribute__((unused)) +char *u64toa(uint64_t in) +{ + u64toa_r(in, itoa_buffer); + return itoa_buffer; +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h new file mode 100644 index 000000000000..a01c69dd495f --- /dev/null +++ b/tools/include/nolibc/string.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * string function definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_STRING_H +#define _NOLIBC_STRING_H + +#include "std.h" + +static void *malloc(size_t len); + +/* + * As much as possible, please keep functions alphabetically sorted. + */ + +static __attribute__((unused)) +int memcmp(const void *s1, const void *s2, size_t n) +{ + size_t ofs = 0; + int c1 = 0; + + while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) { + ofs++; + } + return c1; +} + +#ifndef NOLIBC_ARCH_HAS_MEMMOVE +/* might be ignored by the compiler without -ffreestanding, then found as + * missing. + */ +__attribute__((weak,unused,section(".text.nolibc_memmove"))) +void *memmove(void *dst, const void *src, size_t len) +{ + size_t dir, pos; + + pos = len; + dir = -1; + + if (dst < src) { + pos = -1; + dir = 1; + } + + while (len) { + pos += dir; + ((char *)dst)[pos] = ((const char *)src)[pos]; + len--; + } + return dst; +} +#endif /* #ifndef NOLIBC_ARCH_HAS_MEMMOVE */ + +#ifndef NOLIBC_ARCH_HAS_MEMCPY +/* must be exported, as it's used by libgcc on ARM */ +__attribute__((weak,unused,section(".text.nolibc_memcpy"))) +void *memcpy(void *dst, const void *src, size_t len) +{ + size_t pos = 0; + + while (pos < len) { + ((char *)dst)[pos] = ((const char *)src)[pos]; + pos++; + } + return dst; +} +#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCPY */ + +#ifndef NOLIBC_ARCH_HAS_MEMSET +/* might be ignored by the compiler without -ffreestanding, then found as + * missing. + */ +__attribute__((weak,unused,section(".text.nolibc_memset"))) +void *memset(void *dst, int b, size_t len) +{ + char *p = dst; + + while (len--) { + /* prevent gcc from recognizing memset() here */ + __asm__ volatile(""); + *(p++) = b; + } + return dst; +} +#endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */ + +static __attribute__((unused)) +char *strchr(const char *s, int c) +{ + while (*s) { + if (*s == (char)c) + return (char *)s; + s++; + } + return NULL; +} + +static __attribute__((unused)) +int strcmp(const char *a, const char *b) +{ + unsigned int c; + int diff; + + while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) + ; + return diff; +} + +static __attribute__((unused)) +char *strcpy(char *dst, const char *src) +{ + char *ret = dst; + + while ((*dst++ = *src++)); + return ret; +} + +/* this function is only used with arguments that are not constants or when + * it's not known because optimizations are disabled. Note that gcc 12 + * recognizes an strlen() pattern and replaces it with a jump to strlen(), + * thus itself, hence the asm() statement below that's meant to disable this + * confusing practice. + */ +static __attribute__((unused)) +size_t strlen(const char *str) +{ + size_t len; + + for (len = 0; str[len]; len++) + __asm__(""); + return len; +} + +/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and + * the two branches, then will rely on an external definition of strlen(). + */ +#if defined(__OPTIMIZE__) +#define nolibc_strlen(x) strlen(x) +#define strlen(str) ({ \ + __builtin_constant_p((str)) ? \ + __builtin_strlen((str)) : \ + nolibc_strlen((str)); \ +}) +#endif + +static __attribute__((unused)) +size_t strnlen(const char *str, size_t maxlen) +{ + size_t len; + + for (len = 0; (len < maxlen) && str[len]; len++); + return len; +} + +static __attribute__((unused)) +char *strdup(const char *str) +{ + size_t len; + char *ret; + + len = strlen(str); + ret = malloc(len + 1); + if (__builtin_expect(ret != NULL, 1)) + memcpy(ret, str, len + 1); + + return ret; +} + +static __attribute__((unused)) +char *strndup(const char *str, size_t maxlen) +{ + size_t len; + char *ret; + + len = strnlen(str, maxlen); + ret = malloc(len + 1); + if (__builtin_expect(ret != NULL, 1)) { + memcpy(ret, str, len); + ret[len] = '\0'; + } + + return ret; +} + +static __attribute__((unused)) +size_t strlcat(char *dst, const char *src, size_t size) +{ + size_t len; + char c; + + for (len = 0; dst[len]; len++) + ; + + for (;;) { + c = *src; + if (len < size) + dst[len] = c; + if (!c) + break; + len++; + src++; + } + + return len; +} + +static __attribute__((unused)) +size_t strlcpy(char *dst, const char *src, size_t size) +{ + size_t len; + char c; + + for (len = 0;;) { + c = src[len]; + if (len < size) + dst[len] = c; + if (!c) + break; + len++; + } + return len; +} + +static __attribute__((unused)) +char *strncat(char *dst, const char *src, size_t size) +{ + char *orig = dst; + + while (*dst) + dst++; + + while (size && (*dst = *src)) { + src++; + dst++; + size--; + } + + *dst = 0; + return orig; +} + +static __attribute__((unused)) +int strncmp(const char *a, const char *b, size_t size) +{ + unsigned int c; + int diff = 0; + + while (size-- && + !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) + ; + + return diff; +} + +static __attribute__((unused)) +char *strncpy(char *dst, const char *src, size_t size) +{ + size_t len; + + for (len = 0; len < size; len++) + if ((dst[len] = *src)) + src++; + return dst; +} + +static __attribute__((unused)) +char *strrchr(const char *s, int c) +{ + const char *ret = NULL; + + while (*s) { + if (*s == (char)c) + ret = s; + s++; + } + return (char *)ret; +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h new file mode 100644 index 000000000000..dda9dffd1d74 --- /dev/null +++ b/tools/include/nolibc/sys.h @@ -0,0 +1,1237 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Syscall definitions for NOLIBC (those in man(2)) + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_SYS_H +#define _NOLIBC_SYS_H + +#include "std.h" + +/* system includes */ +#include <asm/unistd.h> +#include <asm/signal.h> /* for SIGCHLD */ +#include <asm/ioctls.h> +#include <asm/mman.h> +#include <linux/fs.h> +#include <linux/loop.h> +#include <linux/time.h> +#include <linux/auxvec.h> +#include <linux/fcntl.h> /* for O_* and AT_* */ +#include <linux/stat.h> /* for statx() */ +#include <linux/prctl.h> +#include <linux/resource.h> + +#include "arch.h" +#include "errno.h" +#include "stdarg.h" +#include "types.h" + + +/* Syscall return helper: takes the syscall value in argument and checks for an + * error in it. This may only be used with signed returns (int or long), but + * not with pointers. An error is any value < 0. When an error is encountered, + * -ret is set into errno and -1 is returned. Otherwise the returned value is + * passed as-is with its type preserved. + */ + +#define __sysret(arg) \ +({ \ + __typeof__(arg) __sysret_arg = (arg); \ + (__sysret_arg < 0) /* error ? */ \ + ? (({ SET_ERRNO(-__sysret_arg); }), -1) /* ret -1 with errno = -arg */ \ + : __sysret_arg; /* return original value */ \ +}) + +/* Syscall ENOSYS helper: Avoids unused-parameter warnings and provides a + * debugging hook. + */ + +static __inline__ int __nolibc_enosys(const char *syscall, ...) +{ + (void)syscall; + return -ENOSYS; +} + + +/* Functions in this file only describe syscalls. They're declared static so + * that the compiler usually decides to inline them while still being allowed + * to pass a pointer to one of their instances. Each syscall exists in two + * versions: + * - the "internal" ones, which matches the raw syscall interface at the + * kernel level, which may sometimes slightly differ from the documented + * libc-level ones. For example most of them return either a valid value + * or -errno. All of these are prefixed with "sys_". They may be called + * by non-portable applications if desired. + * + * - the "exported" ones, whose interface must closely match the one + * documented in man(2), that applications are supposed to expect. These + * ones rely on the internal ones, and set errno. + * + * Each syscall will be defined with the two functions, sorted in alphabetical + * order applied to the exported names. + * + * In case of doubt about the relevance of a function here, only those which + * set errno should be defined here. Wrappers like those appearing in man(3) + * should not be placed here. + */ + + +/* + * int brk(void *addr); + * void *sbrk(intptr_t inc) + */ + +static __attribute__((unused)) +void *sys_brk(void *addr) +{ + return (void *)my_syscall1(__NR_brk, addr); +} + +static __attribute__((unused)) +int brk(void *addr) +{ + void *ret = sys_brk(addr); + + if (!ret) { + SET_ERRNO(ENOMEM); + return -1; + } + return 0; +} + +static __attribute__((unused)) +void *sbrk(intptr_t inc) +{ + /* first call to find current end */ + void *ret = sys_brk(0); + + if (ret && sys_brk(ret + inc) == ret + inc) + return ret + inc; + + SET_ERRNO(ENOMEM); + return (void *)-1; +} + + +/* + * int chdir(const char *path); + */ + +static __attribute__((unused)) +int sys_chdir(const char *path) +{ + return my_syscall1(__NR_chdir, path); +} + +static __attribute__((unused)) +int chdir(const char *path) +{ + return __sysret(sys_chdir(path)); +} + + +/* + * int chmod(const char *path, mode_t mode); + */ + +static __attribute__((unused)) +int sys_chmod(const char *path, mode_t mode) +{ +#ifdef __NR_fchmodat + return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); +#elif defined(__NR_chmod) + return my_syscall2(__NR_chmod, path, mode); +#else + return __nolibc_enosys(__func__, path, mode); +#endif +} + +static __attribute__((unused)) +int chmod(const char *path, mode_t mode) +{ + return __sysret(sys_chmod(path, mode)); +} + + +/* + * int chown(const char *path, uid_t owner, gid_t group); + */ + +static __attribute__((unused)) +int sys_chown(const char *path, uid_t owner, gid_t group) +{ +#ifdef __NR_fchownat + return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); +#elif defined(__NR_chown) + return my_syscall3(__NR_chown, path, owner, group); +#else + return __nolibc_enosys(__func__, path, owner, group); +#endif +} + +static __attribute__((unused)) +int chown(const char *path, uid_t owner, gid_t group) +{ + return __sysret(sys_chown(path, owner, group)); +} + + +/* + * int chroot(const char *path); + */ + +static __attribute__((unused)) +int sys_chroot(const char *path) +{ + return my_syscall1(__NR_chroot, path); +} + +static __attribute__((unused)) +int chroot(const char *path) +{ + return __sysret(sys_chroot(path)); +} + + +/* + * int close(int fd); + */ + +static __attribute__((unused)) +int sys_close(int fd) +{ + return my_syscall1(__NR_close, fd); +} + +static __attribute__((unused)) +int close(int fd) +{ + return __sysret(sys_close(fd)); +} + + +/* + * int dup(int fd); + */ + +static __attribute__((unused)) +int sys_dup(int fd) +{ + return my_syscall1(__NR_dup, fd); +} + +static __attribute__((unused)) +int dup(int fd) +{ + return __sysret(sys_dup(fd)); +} + + +/* + * int dup2(int old, int new); + */ + +static __attribute__((unused)) +int sys_dup2(int old, int new) +{ +#ifdef __NR_dup3 + return my_syscall3(__NR_dup3, old, new, 0); +#elif defined(__NR_dup2) + return my_syscall2(__NR_dup2, old, new); +#else + return __nolibc_enosys(__func__, old, new); +#endif +} + +static __attribute__((unused)) +int dup2(int old, int new) +{ + return __sysret(sys_dup2(old, new)); +} + + +/* + * int dup3(int old, int new, int flags); + */ + +#ifdef __NR_dup3 +static __attribute__((unused)) +int sys_dup3(int old, int new, int flags) +{ + return my_syscall3(__NR_dup3, old, new, flags); +} + +static __attribute__((unused)) +int dup3(int old, int new, int flags) +{ + return __sysret(sys_dup3(old, new, flags)); +} +#endif + + +/* + * int execve(const char *filename, char *const argv[], char *const envp[]); + */ + +static __attribute__((unused)) +int sys_execve(const char *filename, char *const argv[], char *const envp[]) +{ + return my_syscall3(__NR_execve, filename, argv, envp); +} + +static __attribute__((unused)) +int execve(const char *filename, char *const argv[], char *const envp[]) +{ + return __sysret(sys_execve(filename, argv, envp)); +} + + +/* + * void exit(int status); + */ + +static __attribute__((noreturn,unused)) +void sys_exit(int status) +{ + my_syscall1(__NR_exit, status & 255); + while(1); /* shut the "noreturn" warnings. */ +} + +static __attribute__((noreturn,unused)) +void exit(int status) +{ + sys_exit(status); +} + + +/* + * pid_t fork(void); + */ + +#ifndef sys_fork +static __attribute__((unused)) +pid_t sys_fork(void) +{ +#ifdef __NR_clone + /* note: some archs only have clone() and not fork(). Different archs + * have a different API, but most archs have the flags on first arg and + * will not use the rest with no other flag. + */ + return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); +#elif defined(__NR_fork) + return my_syscall0(__NR_fork); +#else + return __nolibc_enosys(__func__); +#endif +} +#endif + +static __attribute__((unused)) +pid_t fork(void) +{ + return __sysret(sys_fork()); +} + + +/* + * int fsync(int fd); + */ + +static __attribute__((unused)) +int sys_fsync(int fd) +{ + return my_syscall1(__NR_fsync, fd); +} + +static __attribute__((unused)) +int fsync(int fd) +{ + return __sysret(sys_fsync(fd)); +} + + +/* + * int getdents64(int fd, struct linux_dirent64 *dirp, int count); + */ + +static __attribute__((unused)) +int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return my_syscall3(__NR_getdents64, fd, dirp, count); +} + +static __attribute__((unused)) +int getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return __sysret(sys_getdents64(fd, dirp, count)); +} + + +/* + * uid_t geteuid(void); + */ + +static __attribute__((unused)) +uid_t sys_geteuid(void) +{ +#ifdef __NR_geteuid32 + return my_syscall0(__NR_geteuid32); +#else + return my_syscall0(__NR_geteuid); +#endif +} + +static __attribute__((unused)) +uid_t geteuid(void) +{ + return sys_geteuid(); +} + + +/* + * pid_t getpgid(pid_t pid); + */ + +static __attribute__((unused)) +pid_t sys_getpgid(pid_t pid) +{ + return my_syscall1(__NR_getpgid, pid); +} + +static __attribute__((unused)) +pid_t getpgid(pid_t pid) +{ + return __sysret(sys_getpgid(pid)); +} + + +/* + * pid_t getpgrp(void); + */ + +static __attribute__((unused)) +pid_t sys_getpgrp(void) +{ + return sys_getpgid(0); +} + +static __attribute__((unused)) +pid_t getpgrp(void) +{ + return sys_getpgrp(); +} + + +/* + * pid_t getpid(void); + */ + +static __attribute__((unused)) +pid_t sys_getpid(void) +{ + return my_syscall0(__NR_getpid); +} + +static __attribute__((unused)) +pid_t getpid(void) +{ + return sys_getpid(); +} + + +/* + * pid_t getppid(void); + */ + +static __attribute__((unused)) +pid_t sys_getppid(void) +{ + return my_syscall0(__NR_getppid); +} + +static __attribute__((unused)) +pid_t getppid(void) +{ + return sys_getppid(); +} + + +/* + * pid_t gettid(void); + */ + +static __attribute__((unused)) +pid_t sys_gettid(void) +{ + return my_syscall0(__NR_gettid); +} + +static __attribute__((unused)) +pid_t gettid(void) +{ + return sys_gettid(); +} + +static unsigned long getauxval(unsigned long key); + +/* + * int getpagesize(void); + */ + +static __attribute__((unused)) +int getpagesize(void) +{ + return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT); +} + + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ +#ifdef __NR_gettimeofday + return my_syscall2(__NR_gettimeofday, tv, tz); +#else + return __nolibc_enosys(__func__, tv, tz); +#endif +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(sys_gettimeofday(tv, tz)); +} + + +/* + * uid_t getuid(void); + */ + +static __attribute__((unused)) +uid_t sys_getuid(void) +{ +#ifdef __NR_getuid32 + return my_syscall0(__NR_getuid32); +#else + return my_syscall0(__NR_getuid); +#endif +} + +static __attribute__((unused)) +uid_t getuid(void) +{ + return sys_getuid(); +} + + +/* + * int ioctl(int fd, unsigned long req, void *value); + */ + +static __attribute__((unused)) +int sys_ioctl(int fd, unsigned long req, void *value) +{ + return my_syscall3(__NR_ioctl, fd, req, value); +} + +static __attribute__((unused)) +int ioctl(int fd, unsigned long req, void *value) +{ + return __sysret(sys_ioctl(fd, req, value)); +} + +/* + * int kill(pid_t pid, int signal); + */ + +static __attribute__((unused)) +int sys_kill(pid_t pid, int signal) +{ + return my_syscall2(__NR_kill, pid, signal); +} + +static __attribute__((unused)) +int kill(pid_t pid, int signal) +{ + return __sysret(sys_kill(pid, signal)); +} + + +/* + * int link(const char *old, const char *new); + */ + +static __attribute__((unused)) +int sys_link(const char *old, const char *new) +{ +#ifdef __NR_linkat + return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); +#elif defined(__NR_link) + return my_syscall2(__NR_link, old, new); +#else + return __nolibc_enosys(__func__, old, new); +#endif +} + +static __attribute__((unused)) +int link(const char *old, const char *new) +{ + return __sysret(sys_link(old, new)); +} + + +/* + * off_t lseek(int fd, off_t offset, int whence); + */ + +static __attribute__((unused)) +off_t sys_lseek(int fd, off_t offset, int whence) +{ +#ifdef __NR_lseek + return my_syscall3(__NR_lseek, fd, offset, whence); +#else + return __nolibc_enosys(__func__, fd, offset, whence); +#endif +} + +static __attribute__((unused)) +off_t lseek(int fd, off_t offset, int whence) +{ + return __sysret(sys_lseek(fd, offset, whence)); +} + + +/* + * int mkdir(const char *path, mode_t mode); + */ + +static __attribute__((unused)) +int sys_mkdir(const char *path, mode_t mode) +{ +#ifdef __NR_mkdirat + return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); +#elif defined(__NR_mkdir) + return my_syscall2(__NR_mkdir, path, mode); +#else + return __nolibc_enosys(__func__, path, mode); +#endif +} + +static __attribute__((unused)) +int mkdir(const char *path, mode_t mode) +{ + return __sysret(sys_mkdir(path, mode)); +} + +/* + * int rmdir(const char *path); + */ + +static __attribute__((unused)) +int sys_rmdir(const char *path) +{ +#ifdef __NR_rmdir + return my_syscall1(__NR_rmdir, path); +#elif defined(__NR_unlinkat) + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); +#else + return __nolibc_enosys(__func__, path); +#endif +} + +static __attribute__((unused)) +int rmdir(const char *path) +{ + return __sysret(sys_rmdir(path)); +} + + +/* + * int mknod(const char *path, mode_t mode, dev_t dev); + */ + +static __attribute__((unused)) +long sys_mknod(const char *path, mode_t mode, dev_t dev) +{ +#ifdef __NR_mknodat + return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); +#elif defined(__NR_mknod) + return my_syscall3(__NR_mknod, path, mode, dev); +#else + return __nolibc_enosys(__func__, path, mode, dev); +#endif +} + +static __attribute__((unused)) +int mknod(const char *path, mode_t mode, dev_t dev) +{ + return __sysret(sys_mknod(path, mode, dev)); +} + +#ifndef sys_mmap +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() + * which returns -1 upon error and still satisfy user land that checks for + * MAP_FAILED. + */ + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(sys_munmap(addr, length)); +} + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(sys_mount(src, tgt, fst, flags, data)); +} + + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ +#ifdef __NR_openat + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +#elif defined(__NR_open) + return my_syscall3(__NR_open, path, flags, mode); +#else + return __nolibc_enosys(__func__, path, flags, mode); +#endif +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, int); + va_end(args); + } + + return __sysret(sys_open(path, flags, mode)); +} + + +/* + * int pipe2(int pipefd[2], int flags); + * int pipe(int pipefd[2]); + */ + +static __attribute__((unused)) +int sys_pipe2(int pipefd[2], int flags) +{ + return my_syscall2(__NR_pipe2, pipefd, flags); +} + +static __attribute__((unused)) +int pipe2(int pipefd[2], int flags) +{ + return __sysret(sys_pipe2(pipefd, flags)); +} + +static __attribute__((unused)) +int pipe(int pipefd[2]) +{ + return pipe2(pipefd, 0); +} + + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); +} + + +/* + * int pivot_root(const char *new, const char *old); + */ + +static __attribute__((unused)) +int sys_pivot_root(const char *new, const char *old) +{ + return my_syscall2(__NR_pivot_root, new, old); +} + +static __attribute__((unused)) +int pivot_root(const char *new, const char *old) +{ + return __sysret(sys_pivot_root(new, old)); +} + + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_poll) + return my_syscall3(__NR_poll, fds, nfds, timeout); +#else + return __nolibc_enosys(__func__, fds, nfds, timeout); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(sys_poll(fds, nfds, timeout)); +} + + +/* + * ssize_t read(int fd, void *buf, size_t count); + */ + +static __attribute__((unused)) +ssize_t sys_read(int fd, void *buf, size_t count) +{ + return my_syscall3(__NR_read, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t read(int fd, void *buf, size_t count) +{ + return __sysret(sys_read(fd, buf, count)); +} + + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); +} + + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + + +/* + * int sched_yield(void); + */ + +static __attribute__((unused)) +int sys_sched_yield(void) +{ + return my_syscall0(__NR_sched_yield); +} + +static __attribute__((unused)) +int sched_yield(void) +{ + return __sysret(sys_sched_yield()); +} + + +/* + * int select(int nfds, fd_set *read_fds, fd_set *write_fds, + * fd_set *except_fds, struct timeval *timeout); + */ + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__NR__newselect) + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#elif defined(__NR_select) + return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); +#elif defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#else + return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); +#endif +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + return __sysret(sys_select(nfds, rfds, wfds, efds, timeout)); +} + + +/* + * int setpgid(pid_t pid, pid_t pgid); + */ + +static __attribute__((unused)) +int sys_setpgid(pid_t pid, pid_t pgid) +{ + return my_syscall2(__NR_setpgid, pid, pgid); +} + +static __attribute__((unused)) +int setpgid(pid_t pid, pid_t pgid) +{ + return __sysret(sys_setpgid(pid, pgid)); +} + + +/* + * pid_t setsid(void); + */ + +static __attribute__((unused)) +pid_t sys_setsid(void) +{ + return my_syscall0(__NR_setsid); +} + +static __attribute__((unused)) +pid_t setsid(void) +{ + return __sysret(sys_setsid()); +} + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return my_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + struct statx statx; + long ret; + + ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = ((statx.stx_dev_minor & 0xff) + | (statx.stx_dev_major << 8) + | ((statx.stx_dev_minor & ~0xff) << 12)); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = ((statx.stx_rdev_minor & 0xff) + | (statx.stx_rdev_major << 8) + | ((statx.stx_rdev_minor & ~0xff) << 12)); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + + +/* + * int symlink(const char *old, const char *new); + */ + +static __attribute__((unused)) +int sys_symlink(const char *old, const char *new) +{ +#ifdef __NR_symlinkat + return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); +#elif defined(__NR_symlink) + return my_syscall2(__NR_symlink, old, new); +#else + return __nolibc_enosys(__func__, old, new); +#endif +} + +static __attribute__((unused)) +int symlink(const char *old, const char *new) +{ + return __sysret(sys_symlink(old, new)); +} + + +/* + * mode_t umask(mode_t mode); + */ + +static __attribute__((unused)) +mode_t sys_umask(mode_t mode) +{ + return my_syscall1(__NR_umask, mode); +} + +static __attribute__((unused)) +mode_t umask(mode_t mode) +{ + return sys_umask(mode); +} + + +/* + * int umount2(const char *path, int flags); + */ + +static __attribute__((unused)) +int sys_umount2(const char *path, int flags) +{ + return my_syscall2(__NR_umount2, path, flags); +} + +static __attribute__((unused)) +int umount2(const char *path, int flags) +{ + return __sysret(sys_umount2(path, flags)); +} + + +/* + * int unlink(const char *path); + */ + +static __attribute__((unused)) +int sys_unlink(const char *path) +{ +#ifdef __NR_unlinkat + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); +#elif defined(__NR_unlink) + return my_syscall1(__NR_unlink, path); +#else + return __nolibc_enosys(__func__, path); +#endif +} + +static __attribute__((unused)) +int unlink(const char *path) +{ + return __sysret(sys_unlink(path)); +} + + +/* + * pid_t wait(int *status); + * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); + * pid_t waitpid(pid_t pid, int *status, int options); + */ + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ +#ifdef __NR_wait4 + return my_syscall4(__NR_wait4, pid, status, options, rusage); +#else + return __nolibc_enosys(__func__, pid, status, options, rusage); +#endif +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return __sysret(sys_wait4(-1, status, 0, NULL)); +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return __sysret(sys_wait4(pid, status, options, rusage)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + return __sysret(sys_wait4(pid, status, options, NULL)); +} + + +/* + * ssize_t write(int fd, const void *buf, size_t count); + */ + +static __attribute__((unused)) +ssize_t sys_write(int fd, const void *buf, size_t count) +{ + return my_syscall3(__NR_write, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t write(int fd, const void *buf, size_t count) +{ + return __sysret(sys_write(fd, buf, count)); +} + + +/* + * int memfd_create(const char *name, unsigned int flags); + */ + +static __attribute__((unused)) +int sys_memfd_create(const char *name, unsigned int flags) +{ + return my_syscall2(__NR_memfd_create, name, flags); +} + +static __attribute__((unused)) +int memfd_create(const char *name, unsigned int flags) +{ + return __sysret(sys_memfd_create(name, flags)); +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h new file mode 100644 index 000000000000..84655361b9ad --- /dev/null +++ b/tools/include/nolibc/time.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_TIME_H +#define _NOLIBC_TIME_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + +static __attribute__((unused)) +time_t time(time_t *tptr) +{ + struct timeval tv; + + /* note, cannot fail here */ + sys_gettimeofday(&tv, NULL); + + if (tptr) + *tptr = tv.tv_sec; + return tv.tv_sec; +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h new file mode 100644 index 000000000000..b26a5d0c417c --- /dev/null +++ b/tools/include/nolibc/types.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Special types used by various syscalls for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_TYPES_H +#define _NOLIBC_TYPES_H + +#include "std.h" +#include <linux/mman.h> +#include <linux/reboot.h> /* for LINUX_REBOOT_* */ +#include <linux/stat.h> +#include <linux/time.h> +#include <linux/wait.h> +#include <linux/resource.h> + + +/* Only the generic macros and types may be defined here. The arch-specific + * ones such as the O_RDONLY and related macros used by fcntl() and open() + * must not be defined here. + */ + +/* stat flags (WARNING, octal here). We need to check for an existing + * definition because linux/stat.h may omit to define those if it finds + * that any glibc header was already included. + */ +#if !defined(S_IFMT) +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFBLK 0060000 +#define S_IFREG 0100000 +#define S_IFIFO 0010000 +#define S_IFLNK 0120000 +#define S_IFSOCK 0140000 +#define S_IFMT 0170000 + +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) + +#define S_IRWXU 00700 +#define S_IRUSR 00400 +#define S_IWUSR 00200 +#define S_IXUSR 00100 + +#define S_IRWXG 00070 +#define S_IRGRP 00040 +#define S_IWGRP 00020 +#define S_IXGRP 00010 + +#define S_IRWXO 00007 +#define S_IROTH 00004 +#define S_IWOTH 00002 +#define S_IXOTH 00001 +#endif + +/* dirent types */ +#define DT_UNKNOWN 0x0 +#define DT_FIFO 0x1 +#define DT_CHR 0x2 +#define DT_DIR 0x4 +#define DT_BLK 0x6 +#define DT_REG 0x8 +#define DT_LNK 0xa +#define DT_SOCK 0xc + +/* commonly an fd_set represents 256 FDs */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different + * values. + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifndef MAXPATHLEN +#define MAXPATHLEN (PATH_MAX) +#endif + +/* flags for mmap */ +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +/* whence values for lseek() */ +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +/* flags for reboot */ +#define RB_AUTOBOOT LINUX_REBOOT_CMD_RESTART +#define RB_HALT_SYSTEM LINUX_REBOOT_CMD_HALT +#define RB_ENABLE_CAD LINUX_REBOOT_CMD_CAD_ON +#define RB_DISABLE_CAD LINUX_REBOOT_CMD_CAD_OFF +#define RB_POWER_OFF LINUX_REBOOT_CMD_POWER_OFF +#define RB_SW_SUSPEND LINUX_REBOOT_CMD_SW_SUSPEND +#define RB_KEXEC LINUX_REBOOT_CMD_KEXEC + +/* Macros used on waitpid()'s return status */ +#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +#define WIFEXITED(status) (((status) & 0x7f) == 0) +#define WTERMSIG(status) ((status) & 0x7f) +#define WIFSIGNALED(status) ((status) - 1 < 0xff) + +/* standard exit() codes */ +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 + +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + +/* for select() */ +typedef struct { + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; +} fd_set; + +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FX_SETBITMASK)); \ + } while (0) + +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ + } while (0) + +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SET_BITMASK)); \ + __r; \ + }) + +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ + } while (0) + +/* for poll() */ +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 + +struct pollfd { + int fd; + short int events; + short int revents; +}; + +/* for getdents64() */ +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +/* The format of the struct as returned by the libc to the application, which + * significantly differs from the format returned by the stat() syscall flavours. + */ +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + union { time_t st_atime; struct timespec st_atim; }; /* time of last access */ + union { time_t st_mtime; struct timespec st_mtim; }; /* time of last modification */ + union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ +}; + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)(((dev) & 0xff)) + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#ifndef container_of +#define container_of(PTR, TYPE, FIELD) ({ \ + __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \ + (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \ +}) +#endif + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h new file mode 100644 index 000000000000..e38f3660c051 --- /dev/null +++ b/tools/include/nolibc/unistd.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * unistd function definitions for NOLIBC + * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + */ + +#ifndef _NOLIBC_UNISTD_H +#define _NOLIBC_UNISTD_H + +#include "std.h" +#include "arch.h" +#include "types.h" +#include "sys.h" + + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + + +static __attribute__((unused)) +int msleep(unsigned int msecs) +{ + struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return (my_timeval.tv_sec * 1000) + + (my_timeval.tv_usec / 1000) + + !!(my_timeval.tv_usec % 1000); + else + return 0; +} + +static __attribute__((unused)) +unsigned int sleep(unsigned int seconds) +{ + struct timeval my_timeval = { seconds, 0 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return my_timeval.tv_sec + !!my_timeval.tv_usec; + else + return 0; +} + +static __attribute__((unused)) +int usleep(unsigned int usecs) +{ + struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 }; + + return sys_select(0, 0, 0, 0, &my_timeval); +} + +static __attribute__((unused)) +int tcsetpgrp(int fd, pid_t pid) +{ + return ioctl(fd, TIOCSPGRP, &pid); +} + +#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) +#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) +#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/perf/arm_pmuv3.h b/tools/include/perf/arm_pmuv3.h new file mode 100644 index 000000000000..1e397d55384e --- /dev/null +++ b/tools/include/perf/arm_pmuv3.h @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __PERF_ARM_PMUV3_H +#define __PERF_ARM_PMUV3_H + +#include <assert.h> +#include <asm/bug.h> + +#define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) + +/* + * Common architectural and microarchitectural event numbers. + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F + +/* Statistical profiling extension microarchitectural events */ +#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 +#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 +#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 +#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 + +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + +/* ARMv8 recommended implementation defined event types */ +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ +#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ +/* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \ + ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \ + ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ + ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_PMU_OVSR_P GENMASK(30, 0) +#define ARMV8_PMU_OVSR_C BIT(31) +/* Mask for writable bits is both P and C fields */ +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32) +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61) + +/* + * Event filters for PMUv3 + */ +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) + +/* + * PMUSERENR: user enable reg + */ +#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ +#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ +#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* Mask for writable bits */ +#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ + ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) + +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS GENMASK(7, 0) +#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) +#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) +#define ARMV8_PMU_THWIDTH GENMASK(23, 20) + +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: \ + WARN(1, "Invalid PMEV* index\n"); \ + assert(0); \ + } \ + } while (0) + +#endif diff --git a/tools/include/tools/dis-asm-compat.h b/tools/include/tools/dis-asm-compat.h new file mode 100644 index 000000000000..70f331e23ed3 --- /dev/null +++ b/tools/include/tools/dis-asm-compat.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +#ifndef _TOOLS_DIS_ASM_COMPAT_H +#define _TOOLS_DIS_ASM_COMPAT_H + +#include <stdio.h> +#include <dis-asm.h> + +/* define types for older binutils version, to centralize ifdef'ery a bit */ +#ifndef DISASM_INIT_STYLED +enum disassembler_style {DISASSEMBLER_STYLE_NOT_EMPTY}; +typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...); +#endif + +/* + * Trivial fprintf wrapper to be used as the fprintf_styled_func argument to + * init_disassemble_info_compat() when normal fprintf suffices. + */ +static inline int fprintf_styled(void *out, + enum disassembler_style style, + const char *fmt, ...) +{ + va_list args; + int r; + + (void)style; + + va_start(args, fmt); + r = vfprintf(out, fmt, args); + va_end(args); + + return r; +} + +/* + * Wrapper for init_disassemble_info() that hides version + * differences. Depending on binutils version and architecture either + * fprintf_func or fprintf_styled_func will be called. + */ +static inline void init_disassemble_info_compat(struct disassemble_info *info, + void *stream, + fprintf_ftype unstyled_func, + fprintf_styled_ftype styled_func) +{ +#ifdef DISASM_INIT_STYLED + init_disassemble_info(info, stream, + unstyled_func, + styled_func); +#else + (void)styled_func; + init_disassemble_info(info, stream, + unstyled_func); +#endif +} + +#endif /* _TOOLS_DIS_ASM_COMPAT_H */ diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h index 23e6c416b85f..352cb81947b8 100644 --- a/tools/include/uapi/asm-generic/bitsperlong.h +++ b/tools/include/uapi/asm-generic/bitsperlong.h @@ -1,6 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI__ASM_GENERIC_BITS_PER_LONG #define _UAPI__ASM_GENERIC_BITS_PER_LONG +#ifndef __BITS_PER_LONG +/* + * In order to keep safe and avoid regression, only unify uapi + * bitsperlong.h for some archs which are using newer toolchains + * that have the definitions of __CHAR_BIT__ and __SIZEOF_LONG__. + * See the following link for more info: + * https://lore.kernel.org/linux-arch/b9624545-2c80-49a1-ac3c-39264a591f7b@app.fastmail.com/ + */ +#if defined(__CHAR_BIT__) && defined(__SIZEOF_LONG__) +#define __BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__) +#else /* * There seems to be no way of detecting this automatically from user * space, so 64 bit architectures should override this in their @@ -8,8 +20,8 @@ * both 32 and 64 bit user space must not rely on CONFIG_64BIT * to decide it, but rather check a compiler provided macro. */ -#ifndef __BITS_PER_LONG #define __BITS_PER_LONG 32 #endif +#endif #endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */ diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h index ac190958c981..1c7a0f6632c0 100644 --- a/tools/include/uapi/asm-generic/fcntl.h +++ b/tools/include/uapi/asm-generic/fcntl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_GENERIC_FCNTL_H #define _ASM_GENERIC_FCNTL_H @@ -90,7 +91,6 @@ /* a horrid kludge trying to make sure that this will fail on old kernels */ #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) -#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) #ifndef O_NDELAY #define O_NDELAY O_NONBLOCK @@ -115,13 +115,13 @@ #define F_GETSIG 11 /* for sockets. */ #endif -#ifndef CONFIG_64BIT +#if __BITS_PER_LONG == 32 || defined(__KERNEL__) #ifndef F_GETLK64 #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 #endif -#endif +#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */ #ifndef F_SETOWN_EX #define F_SETOWN_EX 15 @@ -142,7 +142,7 @@ * record locks, but are "owned" by the open file description, not the * process. This means that they are inherited across fork() like BSD (flock) * locks, and they are only released automatically when the last reference to - * the the open file against which they were acquired is put. + * the open file against which they were acquired is put. */ #define F_OFD_GETLK 36 #define F_OFD_SETLK 37 @@ -180,6 +180,10 @@ struct f_owner_ex { blocking */ #define LOCK_UN 8 /* remove lock */ +/* + * LOCK_MAND support has been removed from the kernel. We leave the symbols + * here to not break legacy builds, but these should not be used in new code. + */ #define LOCK_MAND 32 /* This is a mandatory flock ... */ #define LOCK_READ 64 /* which allows concurrent read operations */ #define LOCK_WRITE 128 /* which allows concurrent write operations */ @@ -188,24 +192,19 @@ struct f_owner_ex { #define F_LINUX_SPECIFIC_BASE 1024 #ifndef HAVE_ARCH_STRUCT_FLOCK -#ifndef __ARCH_FLOCK_PAD -#define __ARCH_FLOCK_PAD -#endif - struct flock { short l_type; short l_whence; __kernel_off_t l_start; __kernel_off_t l_len; __kernel_pid_t l_pid; - __ARCH_FLOCK_PAD -}; +#ifdef __ARCH_FLOCK_EXTRA_SYSID + __ARCH_FLOCK_EXTRA_SYSID #endif - -#ifndef HAVE_ARCH_STRUCT_FLOCK64 -#ifndef __ARCH_FLOCK64_PAD -#define __ARCH_FLOCK64_PAD +#ifdef __ARCH_FLOCK_PAD + __ARCH_FLOCK_PAD #endif +}; struct flock64 { short l_type; @@ -213,8 +212,10 @@ struct flock64 { __kernel_loff_t l_start; __kernel_loff_t l_len; __kernel_pid_t l_pid; +#ifdef __ARCH_FLOCK64_PAD __ARCH_FLOCK64_PAD -}; #endif +}; +#endif /* HAVE_ARCH_STRUCT_FLOCK */ #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f94f65d429be..6ce1f1ceb432 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,13 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 77f7c1638eb1..54d9c8bf7c55 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,6 +119,11 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_RCVMARK 75 + +#define SO_PASSPIDFD 76 +#define SO_PEERPIDFD 77 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2b5d72a46c2..75f00965ab15 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy) __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_getevents 4 __SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents) #endif -/* fs/xattr.c */ #define __NR_setxattr 5 __SYSCALL(__NR_setxattr, sys_setxattr) #define __NR_lsetxattr 6 @@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr) __SYSCALL(__NR_lremovexattr, sys_lremovexattr) #define __NR_fremovexattr 16 __SYSCALL(__NR_fremovexattr, sys_fremovexattr) - -/* fs/dcache.c */ #define __NR_getcwd 17 __SYSCALL(__NR_getcwd, sys_getcwd) - -/* fs/cookies.c */ #define __NR_lookup_dcookie 18 -__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) - -/* fs/eventfd.c */ +__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall) #define __NR_eventfd2 19 __SYSCALL(__NR_eventfd2, sys_eventfd2) - -/* fs/eventpoll.c */ #define __NR_epoll_create1 20 __SYSCALL(__NR_epoll_create1, sys_epoll_create1) #define __NR_epoll_ctl 21 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) #define __NR_epoll_pwait 22 __SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait) - -/* fs/fcntl.c */ #define __NR_dup 23 __SYSCALL(__NR_dup, sys_dup) #define __NR_dup3 24 __SYSCALL(__NR_dup3, sys_dup3) #define __NR3264_fcntl 25 __SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64) - -/* fs/inotify_user.c */ #define __NR_inotify_init1 26 __SYSCALL(__NR_inotify_init1, sys_inotify_init1) #define __NR_inotify_add_watch 27 __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 28 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) - -/* fs/ioctl.c */ #define __NR_ioctl 29 __SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl) - -/* fs/ioprio.c */ #define __NR_ioprio_set 30 __SYSCALL(__NR_ioprio_set, sys_ioprio_set) #define __NR_ioprio_get 31 __SYSCALL(__NR_ioprio_get, sys_ioprio_get) - -/* fs/locks.c */ #define __NR_flock 32 __SYSCALL(__NR_flock, sys_flock) - -/* fs/namei.c */ #define __NR_mknodat 33 __SYSCALL(__NR_mknodat, sys_mknodat) #define __NR_mkdirat 34 @@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat) __SYSCALL(__NR_symlinkat, sys_symlinkat) #define __NR_linkat 37 __SYSCALL(__NR_linkat, sys_linkat) + #ifdef __ARCH_WANT_RENAMEAT /* renameat is superseded with flags by renameat2 */ #define __NR_renameat 38 __SYSCALL(__NR_renameat, sys_renameat) #endif /* __ARCH_WANT_RENAMEAT */ -/* fs/namespace.c */ #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 __SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) - -/* fs/nfsctl.c */ #define __NR_nfsservctl 42 __SYSCALL(__NR_nfsservctl, sys_ni_syscall) - -/* fs/open.c */ #define __NR3264_statfs 43 __SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \ compat_sys_statfs64) @@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \ #define __NR3264_ftruncate 46 __SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \ compat_sys_ftruncate64) - #define __NR_fallocate 47 __SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate) #define __NR_faccessat 48 @@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat) __SYSCALL(__NR_close, sys_close) #define __NR_vhangup 58 __SYSCALL(__NR_vhangup, sys_vhangup) - -/* fs/pipe.c */ #define __NR_pipe2 59 __SYSCALL(__NR_pipe2, sys_pipe2) - -/* fs/quota.c */ #define __NR_quotactl 60 __SYSCALL(__NR_quotactl, sys_quotactl) - -/* fs/readdir.c */ #define __NR_getdents64 61 __SYSCALL(__NR_getdents64, sys_getdents64) - -/* fs/read_write.c */ #define __NR3264_lseek 62 __SC_3264(__NR3264_lseek, sys_llseek, sys_lseek) #define __NR_read 63 @@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64) __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv) #define __NR_pwritev 70 __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev) - -/* fs/sendfile.c */ #define __NR3264_sendfile 71 __SYSCALL(__NR3264_sendfile, sys_sendfile64) -/* fs/select.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_pselect6 72 __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32) @@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel __SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32) #endif -/* fs/signalfd.c */ #define __NR_signalfd4 74 __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) - -/* fs/splice.c */ #define __NR_vmsplice 75 __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 __SYSCALL(__NR_tee, sys_tee) - -/* fs/stat.c */ #define __NR_readlinkat 78 __SYSCALL(__NR_readlinkat, sys_readlinkat) + #if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64) #define __NR3264_fstatat 79 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) @@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) #endif -/* fs/sync.c */ #define __NR_sync 81 __SYSCALL(__NR_sync, sys_sync) #define __NR_fsync 82 __SYSCALL(__NR_fsync, sys_fsync) #define __NR_fdatasync 83 __SYSCALL(__NR_fdatasync, sys_fdatasync) + #ifdef __ARCH_WANT_SYNC_FILE_RANGE2 #define __NR_sync_file_range2 84 __SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \ @@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ compat_sys_sync_file_range) #endif -/* fs/timerfd.c */ #define __NR_timerfd_create 85 __SYSCALL(__NR_timerfd_create, sys_timerfd_create) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timerfd_settime 86 __SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \ @@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \ sys_timerfd_gettime) #endif -/* fs/utimes.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_utimensat 88 __SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat) #endif -/* kernel/acct.c */ #define __NR_acct 89 __SYSCALL(__NR_acct, sys_acct) - -/* kernel/capability.c */ #define __NR_capget 90 __SYSCALL(__NR_capget, sys_capget) #define __NR_capset 91 __SYSCALL(__NR_capset, sys_capset) - -/* kernel/exec_domain.c */ #define __NR_personality 92 __SYSCALL(__NR_personality, sys_personality) - -/* kernel/exit.c */ #define __NR_exit 93 __SYSCALL(__NR_exit, sys_exit) #define __NR_exit_group 94 __SYSCALL(__NR_exit_group, sys_exit_group) #define __NR_waitid 95 __SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid) - -/* kernel/fork.c */ #define __NR_set_tid_address 96 __SYSCALL(__NR_set_tid_address, sys_set_tid_address) #define __NR_unshare 97 __SYSCALL(__NR_unshare, sys_unshare) -/* kernel/futex.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_futex 98 __SC_3264(__NR_futex, sys_futex_time32, sys_futex) #endif + #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ compat_sys_get_robust_list) -/* kernel/hrtimer.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_nanosleep 101 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep) #endif -/* kernel/itimer.c */ #define __NR_getitimer 102 __SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer) #define __NR_setitimer 103 __SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer) - -/* kernel/kexec.c */ #define __NR_kexec_load 104 __SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load) - -/* kernel/module.c */ #define __NR_init_module 105 __SYSCALL(__NR_init_module, sys_init_module) #define __NR_delete_module 106 __SYSCALL(__NR_delete_module, sys_delete_module) - -/* kernel/posix-timers.c */ #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_gettime 108 __SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime) #endif + #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_settime 110 __SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime) #endif + #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_settime 112 __SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime) @@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \ sys_clock_nanosleep) #endif -/* kernel/printk.c */ #define __NR_syslog 116 __SYSCALL(__NR_syslog, sys_syslog) - -/* kernel/ptrace.c */ #define __NR_ptrace 117 -__SYSCALL(__NR_ptrace, sys_ptrace) - -/* kernel/sched/core.c */ +__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace) #define __NR_sched_setparam 118 __SYSCALL(__NR_sched_setparam, sys_sched_setparam) #define __NR_sched_setscheduler 119 @@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield) __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 126 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_sched_rr_get_interval 127 __SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \ sys_sched_rr_get_interval) #endif -/* kernel/signal.c */ #define __NR_restart_syscall 128 __SYSCALL(__NR_restart_syscall, sys_restart_syscall) #define __NR_kill 129 @@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction) __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 136 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_rt_sigtimedwait 137 __SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \ sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) #endif + #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) #define __NR_rt_sigreturn 139 __SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn) - -/* kernel/sys.c */ #define __NR_setpriority 140 __SYSCALL(__NR_setpriority, sys_setpriority) #define __NR_getpriority 141 @@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl) #define __NR_getcpu 168 __SYSCALL(__NR_getcpu, sys_getcpu) -/* kernel/time.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_gettimeofday 169 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) @@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) #endif -/* kernel/timer.c */ #define __NR_getpid 172 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_getppid 173 @@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid) __SYSCALL(__NR_gettid, sys_gettid) #define __NR_sysinfo 179 __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo) - -/* ipc/mqueue.c */ #define __NR_mq_open 180 __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_mq_timedsend 182 __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) @@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) __SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \ sys_mq_timedreceive) #endif + #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 __SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr) - -/* ipc/msg.c */ #define __NR_msgget 186 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 187 @@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl) __SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv) #define __NR_msgsnd 189 __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd) - -/* ipc/sem.c */ #define __NR_semget 190 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 __SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop) #endif + #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) - -/* ipc/shm.c */ #define __NR_shmget 194 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 195 @@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl) __SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat) #define __NR_shmdt 197 __SYSCALL(__NR_shmdt, sys_shmdt) - -/* net/socket.c */ #define __NR_socket 198 __SYSCALL(__NR_socket, sys_socket) #define __NR_socketpair 199 @@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown) __SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg) #define __NR_recvmsg 212 __SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg) - -/* mm/filemap.c */ #define __NR_readahead 213 __SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead) - -/* mm/nommu.c, also with MMU */ #define __NR_brk 214 __SYSCALL(__NR_brk, sys_brk) #define __NR_munmap 215 __SYSCALL(__NR_munmap, sys_munmap) #define __NR_mremap 216 __SYSCALL(__NR_mremap, sys_mremap) - -/* security/keys/keyctl.c */ #define __NR_add_key 217 __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 218 __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 219 __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl) - -/* arch/example/kernel/sys_example.c */ #define __NR_clone 220 __SYSCALL(__NR_clone, sys_clone) #define __NR_execve 221 __SC_COMP(__NR_execve, sys_execve, compat_sys_execve) - #define __NR3264_mmap 222 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap) -/* mm/fadvise.c */ #define __NR3264_fadvise64 223 __SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64) -/* mm/, CONFIG_MMU only */ +/* CONFIG_MMU only */ #ifndef __ARCH_NOMMU #define __NR_swapon 224 __SYSCALL(__NR_swapon, sys_swapon) @@ -673,15 +597,15 @@ __SYSCALL(__NR_madvise, sys_madvise) #define __NR_remap_file_pages 234 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) #define __NR_mbind 235 -__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 236 -__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 237 -__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_migrate_pages 238 -__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_move_pages 239 -__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #endif #define __NR_rt_tgsigqueueinfo 240 @@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_recvmmsg 243 __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32) @@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv #define __NR_wait4 260 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4) #endif + #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) #define __NR_fanotify_init 262 @@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 265 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_adjtime 266 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime) #endif + #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 @@ -770,16 +698,20 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 291 __SYSCALL(__NR_statx, sys_statx) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_pgetevents 292 __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents) #endif + #define __NR_rseq 293 __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) + /* 295 through 402 are unassigned to sync up with generic numbers, don't use */ -#if __BITS_PER_LONG == 32 + +#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32 #define __NR_clock_gettime64 403 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime) #define __NR_clock_settime64 404 @@ -844,22 +776,74 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) + #ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) #endif + #define __NR_close_range 436 __SYSCALL(__NR_close_range, sys_close_range) - #define __NR_openat2 437 __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_epoll_pwait2 441 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) + +#ifdef __ARCH_WANT_MEMFD_SECRET +#define __NR_memfd_secret 447 +__SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#endif + +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) +#define __NR_futex_waitv 449 +__SYSCALL(__NR_futex_waitv, sys_futex_waitv) +#define __NR_set_mempolicy_home_node 450 +__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_cachestat 451 +__SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_fchmodat2 452 +__SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_map_shadow_stack 453 +__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) + +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) + +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) + +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #undef __NR_syscalls -#define __NR_syscalls 440 +#define __NR_syscalls 462 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index edba4d93e9e6..c65267afc341 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #if defined(__i386__) || defined(__x86_64__) #include "../../../arch/x86/include/uapi/asm/bitsperlong.h" -#elif defined(__aarch64__) -#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h" #elif defined(__powerpc__) #include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h" #elif defined(__s390__) @@ -13,8 +11,6 @@ #include "../../../arch/mips/include/uapi/asm/bitsperlong.h" #elif defined(__ia64__) #include "../../../arch/ia64/include/uapi/asm/bitsperlong.h" -#elif defined(__riscv) -#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h" #elif defined(__alpha__) #include "../../../arch/alpha/include/uapi/asm/bitsperlong.h" #else diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 39acc149d843..ff52668abf8c 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -1,9 +1,13 @@ #if defined(__aarch64__) #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__arc__) +#include "../../arch/arc/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" #elif defined(__riscv) #include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h" +#elif defined(__loongarch__) +#include "../../arch/loongarch/include/uapi/asm/bpf_perf_event.h" #else #include <uapi/asm-generic/bpf_perf_event.h> #endif diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index 637189ec1ab9..869379f91fe4 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -9,10 +9,8 @@ #include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) #include "../../../arch/mips/include/uapi/asm/errno.h" -#elif defined(__ia64__) -#include "../../../arch/ia64/include/uapi/asm/errno.h" -#elif defined(__xtensa__) -#include "../../../arch/xtensa/include/uapi/asm/errno.h" +#elif defined(__hppa__) +#include "../../../arch/parisc/include/uapi/asm/errno.h" #else #include <asm-generic/errno.h> #endif diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 808b48a93330..16122819edfe 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1,11 +1,10 @@ -/** - * \file drm.h +/* * Header for the Direct Rendering Manager * - * \author Rickard E. (Rik) Faith <faith@valinux.com> + * Author: Rickard E. (Rik) Faith <faith@valinux.com> * - * \par Acknowledgments: - * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg. + * Acknowledgments: + * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg. */ /* @@ -85,7 +84,7 @@ typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; -/** +/* * Cliprect. * * \warning: If you change this structure, make sure you change @@ -101,7 +100,7 @@ struct drm_clip_rect { unsigned short y2; }; -/** +/* * Drawable information. */ struct drm_drawable_info { @@ -109,7 +108,7 @@ struct drm_drawable_info { struct drm_clip_rect *rects; }; -/** +/* * Texture region, */ struct drm_tex_region { @@ -120,7 +119,7 @@ struct drm_tex_region { unsigned int age; }; -/** +/* * Hardware lock. * * The lock structure is a simple cache-line aligned integer. To avoid @@ -132,7 +131,7 @@ struct drm_hw_lock { char padding[60]; /**< Pad to cache line */ }; -/** +/* * DRM_IOCTL_VERSION ioctl argument type. * * \sa drmGetVersion(). @@ -149,7 +148,7 @@ struct drm_version { char __user *desc; /**< User-space buffer to hold desc */ }; -/** +/* * DRM_IOCTL_GET_UNIQUE ioctl argument type. * * \sa drmGetBusid() and drmSetBusId(). @@ -168,7 +167,7 @@ struct drm_block { int unused; }; -/** +/* * DRM_IOCTL_CONTROL ioctl argument type. * * \sa drmCtlInstHandler() and drmCtlUninstHandler(). @@ -183,7 +182,7 @@ struct drm_control { int irq; }; -/** +/* * Type of memory to map. */ enum drm_map_type { @@ -195,7 +194,7 @@ enum drm_map_type { _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ }; -/** +/* * Memory mapping flags. */ enum drm_map_flags { @@ -214,7 +213,7 @@ struct drm_ctx_priv_map { void *handle; /**< Handle of map */ }; -/** +/* * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls * argument type. * @@ -231,7 +230,7 @@ struct drm_map { /* Private data */ }; -/** +/* * DRM_IOCTL_GET_CLIENT ioctl argument type. */ struct drm_client { @@ -263,7 +262,7 @@ enum drm_stat_type { /* Add to the *END* of the list */ }; -/** +/* * DRM_IOCTL_GET_STATS ioctl argument type. */ struct drm_stats { @@ -274,7 +273,7 @@ struct drm_stats { } data[15]; }; -/** +/* * Hardware locking flags. */ enum drm_lock_flags { @@ -289,7 +288,7 @@ enum drm_lock_flags { _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ }; -/** +/* * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. * * \sa drmGetLock() and drmUnlock(). @@ -299,7 +298,7 @@ struct drm_lock { enum drm_lock_flags flags; }; -/** +/* * DMA flags * * \warning @@ -328,7 +327,7 @@ enum drm_dma_flags { _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ }; -/** +/* * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. * * \sa drmAddBufs(). @@ -351,7 +350,7 @@ struct drm_buf_desc { */ }; -/** +/* * DRM_IOCTL_INFO_BUFS ioctl argument type. */ struct drm_buf_info { @@ -359,7 +358,7 @@ struct drm_buf_info { struct drm_buf_desc __user *list; }; -/** +/* * DRM_IOCTL_FREE_BUFS ioctl argument type. */ struct drm_buf_free { @@ -367,7 +366,7 @@ struct drm_buf_free { int __user *list; }; -/** +/* * Buffer information * * \sa drm_buf_map. @@ -379,7 +378,7 @@ struct drm_buf_pub { void __user *address; /**< Address of buffer */ }; -/** +/* * DRM_IOCTL_MAP_BUFS ioctl argument type. */ struct drm_buf_map { @@ -392,7 +391,7 @@ struct drm_buf_map { struct drm_buf_pub __user *list; /**< Buffer information */ }; -/** +/* * DRM_IOCTL_DMA ioctl argument type. * * Indices here refer to the offset into the buffer list in drm_buf_get. @@ -417,7 +416,7 @@ enum drm_ctx_flags { _DRM_CONTEXT_2DONLY = 0x02 }; -/** +/* * DRM_IOCTL_ADD_CTX ioctl argument type. * * \sa drmCreateContext() and drmDestroyContext(). @@ -427,7 +426,7 @@ struct drm_ctx { enum drm_ctx_flags flags; }; -/** +/* * DRM_IOCTL_RES_CTX ioctl argument type. */ struct drm_ctx_res { @@ -435,14 +434,14 @@ struct drm_ctx_res { struct drm_ctx __user *contexts; }; -/** +/* * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. */ struct drm_draw { drm_drawable_t handle; }; -/** +/* * DRM_IOCTL_UPDATE_DRAW ioctl argument type. */ typedef enum { @@ -456,14 +455,14 @@ struct drm_update_draw { unsigned long long data; }; -/** +/* * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. */ struct drm_auth { drm_magic_t magic; }; -/** +/* * DRM_IOCTL_IRQ_BUSID ioctl argument type. * * \sa drmGetInterruptFromBusID(). @@ -505,7 +504,7 @@ struct drm_wait_vblank_reply { long tval_usec; }; -/** +/* * DRM_IOCTL_WAIT_VBLANK ioctl argument type. * * \sa drmWaitVBlank(). @@ -518,7 +517,7 @@ union drm_wait_vblank { #define _DRM_PRE_MODESET 1 #define _DRM_POST_MODESET 2 -/** +/* * DRM_IOCTL_MODESET_CTL ioctl argument type * * \sa drmModesetCtl(). @@ -528,7 +527,7 @@ struct drm_modeset_ctl { __u32 cmd; }; -/** +/* * DRM_IOCTL_AGP_ENABLE ioctl argument type. * * \sa drmAgpEnable(). @@ -537,7 +536,7 @@ struct drm_agp_mode { unsigned long mode; /**< AGP mode */ }; -/** +/* * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. * * \sa drmAgpAlloc() and drmAgpFree(). @@ -549,7 +548,7 @@ struct drm_agp_buffer { unsigned long physical; /**< Physical used by i810 */ }; -/** +/* * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. * * \sa drmAgpBind() and drmAgpUnbind(). @@ -559,7 +558,7 @@ struct drm_agp_binding { unsigned long offset; /**< In bytes -- will round to page boundary */ }; -/** +/* * DRM_IOCTL_AGP_INFO ioctl argument type. * * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), @@ -580,7 +579,7 @@ struct drm_agp_info { unsigned short id_device; }; -/** +/* * DRM_IOCTL_SG_ALLOC ioctl argument type. */ struct drm_scatter_gather { @@ -588,7 +587,7 @@ struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ }; -/** +/* * DRM_IOCTL_SET_VERSION ioctl argument type. */ struct drm_set_version { @@ -598,14 +597,14 @@ struct drm_set_version { int drm_dd_minor; }; -/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ struct drm_gem_close { /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ struct drm_gem_flink { /** Handle for the object being named */ __u32 handle; @@ -614,7 +613,7 @@ struct drm_gem_flink { __u32 name; }; -/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ struct drm_gem_open { /** Name of object being opened */ __u32 name; @@ -626,33 +625,164 @@ struct drm_gem_open { __u64 size; }; +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ #define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ #define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. + */ #define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ #define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ #define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. + */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/* - * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight - * combination for the hardware cursor. The intention is that a hardware - * agnostic userspace can query a cursor plane size to use. +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. * * Note that the cross-driver contract is to merely return a valid size; * drivers are free to attach another meaning on top, eg. i915 returns the * maximum plane size. */ #define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ #define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. + */ #define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * :ref:`drm_sync_objects`. + */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 -/** DRM_IOCTL_GET_CAP ioctl argument type */ +/* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { __u64 capability; __u64 value; @@ -661,9 +791,12 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_STEREO_3D * - * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * If set to 1, the DRM core will expose the stereo 3D capabilities of the * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. */ #define DRM_CLIENT_CAP_STEREO_3D 1 @@ -672,13 +805,25 @@ struct drm_get_cap { * * If set to 1, the DRM core will expose all planes (overlay, primary, and * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. */ #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 /** * DRM_CLIENT_CAP_ATOMIC * - * If set to 1, the DRM core will expose atomic properties to userspace + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -686,6 +831,10 @@ struct drm_get_cap { * DRM_CLIENT_CAP_ASPECT_RATIO * * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 @@ -693,12 +842,40 @@ struct drm_get_cap { * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS * * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. Depends on client - * also supporting DRM_CLIENT_CAP_ATOMIC + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; __u64 value; @@ -749,6 +926,7 @@ struct drm_syncobj_transfer { #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -757,6 +935,14 @@ struct drm_syncobj_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; }; struct drm_syncobj_timeline_wait { @@ -769,6 +955,35 @@ struct drm_syncobj_timeline_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; }; @@ -834,6 +1049,19 @@ extern "C" { #define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) #define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) #define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ #define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) #define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) #define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) @@ -874,7 +1102,37 @@ extern "C" { #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ #define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ #define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) #define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) @@ -912,10 +1170,40 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) #define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) #define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) @@ -948,9 +1236,59 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + /** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + +/* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. * Generic IOCTLS restart at 0xA0. @@ -962,24 +1300,49 @@ extern "C" { #define DRM_COMMAND_END 0xA0 /** - * Header for events written back to userspace on the drm fd. The - * type defines the type of event, the length specifies the total - * length of the event (including the header), and user_data is - * typically a 64 bit value passed with the ioctl that triggered the - * event. A read on the drm fd will always only return complete - * events, that is, if for example the read buffer is 100 bytes, and - * there are two 64 byte events pending, only one will be returned. + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. * - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and - * up are chipset specific. + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. */ struct drm_event { __u32 type; __u32 length; }; +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ #define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 00546062e023..fd4f9574d177 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -38,13 +38,13 @@ extern "C" { */ /** - * DOC: uevents generated by i915 on it's device node + * DOC: uevents generated by i915 on its device node * * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch - * event from the gpu l3 cache. Additional information supplied is ROW, + * event from the GPU L3 cache. Additional information supplied is ROW, * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep - * track of these events and if a specific cache-line seems to have a - * persistent error remap it with the l3 remapping tool supplied in + * track of these events, and if a specific cache-line seems to have a + * persistent error, remap it with the L3 remapping tool supplied in * intel-gpu-tools. The value supplied with the event is always 1. * * I915_ERROR_UEVENT - Generated upon error detection, currently only via @@ -62,8 +62,8 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" -/* - * i915_user_extension: Base class for defining a chain of extensions +/** + * struct i915_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -76,12 +76,58 @@ extern "C" { * increasing complexity, and for large parts of that interface to be * entirely optional. The downside is more pointer chasing; chasing across * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * */ struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ __u32 name; - __u32 flags; /* All undefined bits must be zero. */ - __u32 rsvd[4]; /* Reserved for future use; must be zero. */ + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; }; /* @@ -108,25 +154,77 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; -/* +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * * Different engines serve different roles, and there may be more than one - * engine serving each role. enum drm_i915_gem_engine_class provides a - * classification of the role of the engine, which may be used when requesting - * operations to be performed on a certain subset of engines, or for providing - * information about that group. + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. */ enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - /* should be kept compact */ + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + + /* Values in this enum should be kept compact. */ + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ I915_ENGINE_CLASS_INVALID = -1 }; -/* +/** + * struct i915_engine_class_instance - Engine class/instance identifier + * * There may be more than one engine fulfilling any role within the system. * Each engine of a class is given a unique instance number and therefore * any engine can be specified by its class:instance tuplet. APIs that allow @@ -134,10 +232,21 @@ enum drm_i915_gem_engine_class { * for this identification. */ struct i915_engine_class_instance { - __u16 engine_class; /* see enum drm_i915_gem_engine_class */ - __u16 engine_instance; + /** + * @engine_class: + * + * Engine class from enum drm_i915_gem_engine_class + */ + __u16 engine_class; #define I915_ENGINE_CLASS_INVALID_NONE -1 #define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 + + /** + * @engine_instance: + * + * Engine instance. + */ + __u16 engine_instance; }; /** @@ -171,14 +280,30 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) -#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) +/* + * Top 4 bits of every non-engine counter are GT id. + */ +#define __I915_PMU_GT_SHIFT (60) + +#define ___I915_PMU_OTHER(gt, x) \ + (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ + ((__u64)(gt) << __I915_PMU_GT_SHIFT)) + +#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) + +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0) +#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1) +#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) +#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) +#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) /* Each region is a minimum of 16k, and there are at most 255 of them. */ @@ -359,6 +484,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_QUERY 0x39 #define DRM_I915_GEM_VM_CREATE 0x3a #define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -391,6 +517,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) @@ -523,7 +650,33 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) +/* + * Query the status of HuC load. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV if HuC is not present on this platform, + * -EOPNOTSUPP if HuC firmware usage is disabled, + * -ENOPKG if HuC firmware fetch failed, + * -ENOEXEC if HuC firmware is invalid or mismatched, + * -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC, + * -EIO if the FW transfer or the FW authentication failed. + * + * If the IOCTL is successful, the returned parameter will be set to one of the + * following values: + * * 0 if HuC firmware load is not complete, + * * 1 if HuC firmware is loaded and fully authenticated, + * * 2 if HuC firmware is loaded and authenticated for clear media only + */ #define I915_PARAM_HUC_STATUS 42 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of @@ -540,7 +693,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_FENCE 44 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture - * user specified bufffers for post-mortem debugging of GPU hangs. See + * user-specified buffers for post-mortem debugging of GPU hangs. See * EXEC_OBJECT_CAPTURE. */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 @@ -619,16 +772,63 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + +/* + * Frequency of the timestamps in OA reports. This used to be the same as the CS + * timestamp frequency, but differs on some platforms. + */ +#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57 + +/* + * Query the status of PXP support in i915. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV = PXP support is not available on the GPU device or in the + * kernel due to missing component drivers or kernel configs. + * + * If the IOCTL is successful, the returned parameter will be set to one of + * the following values: + * 1 = PXP feature is supported and is ready for use. + * 2 = PXP feature is supported but should be ready soon (pending + * initialization of non-i915 system dependencies). + * + * NOTE: When param is supported (positive return values), user space should + * still refer to the GEM PXP context-creation UAPI header specs to be + * aware of possible failure due to system state machine at the time. + */ +#define I915_PARAM_PXP_STATUS 58 + /* Must be kept compact -- no holes and well documented */ -typedef struct drm_i915_getparam { +/** + * struct drm_i915_getparam - Driver parameter query structure. + */ +struct drm_i915_getparam { + /** @param: Driver parameter to query. */ __s32 param; - /* + + /** + * @value: Address of memory where queried value should be put. + * * WARNING: Using pointers instead of fixed-size u64 means we need to write * compat32 code. Don't repeat this mistake. */ int __user *value; -} drm_i915_getparam_t; +}; + +/** + * typedef drm_i915_getparam_t - Driver parameter query structure. + * See struct drm_i915_getparam. + */ +typedef struct drm_i915_getparam drm_i915_getparam_t; /* Ioctl to set kernel params: */ @@ -794,45 +994,113 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; @@ -936,6 +1204,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be @@ -982,10 +1251,16 @@ struct drm_i915_gem_exec_object2 { /** * When the EXEC_OBJECT_PINNED flag is specified this is populated by * the user with the GTT offset at which this object will be pinned. + * * When the I915_EXEC_NO_RELOC flag is specified this must contain the * presumed_offset of the object. + * * During execbuffer2 the kernel populates it with the value of the * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. */ __u64 offset; @@ -1034,38 +1309,119 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +/** + * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf + * ioctl. + * + * The request will wait for input fence to signal before submission. + * + * The returned output fence will be signaled after the completion of the + * request. + */ struct drm_i915_gem_exec_fence { - /** - * User's handle for a drm_syncobj to wait on or signal. - */ + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ __u32 handle; + /** + * @flags: Supported flags are: + * + * I915_EXEC_FENCE_WAIT: + * Wait for the input fence before request submission. + * + * I915_EXEC_FENCE_SIGNAL: + * Return request completion fence as output + */ + __u32 flags; #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) #define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) - __u32 flags; }; -struct drm_i915_gem_execbuffer2 { +/** + * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences + * for execbuf ioctl. + * + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** + * @fence_count: Number of elements in the @handles_ptr & @value_ptr + * arrays. + */ + __u64 fence_count; + + /** + * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence + * of length @fence_count. + */ + __u64 handles_ptr; + /** - * List of gem_exec_object2 structs + * @values_ptr: Pointer to an array of u64 values of length + * @fence_count. + * Values must be 0 for a binary drm_syncobj. A Value of 0 for a + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a + * binary one. */ + __u64 values_ptr; +}; + +/** + * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 + * ioctl. + */ +struct drm_i915_gem_execbuffer2 { + /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ __u64 buffers_ptr; + + /** @buffer_count: Number of elements in @buffers_ptr array */ __u32 buffer_count; - /** Offset in the batchbuffer to start execution from. */ + /** + * @batch_start_offset: Offset in the batchbuffer to start execution + * from. + */ __u32 batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ + + /** + * @batch_len: Length in bytes of the batch buffer, starting from the + * @batch_start_offset. If 0, length is assumed to be the batch buffer + * object size. + */ __u32 batch_len; + + /** @DR1: deprecated */ __u32 DR1; + + /** @DR4: deprecated */ __u32 DR4; + + /** @num_cliprects: See @cliprects_ptr */ __u32 num_cliprects; + /** - * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. + * + * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or + * I915_EXEC_USE_EXTENSIONS flags are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the + * array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single &i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; + + /** @flags: Execbuf flags */ + __u64 flags; #define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) @@ -1083,10 +1439,6 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ #define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ - __u64 flags; - __u64 rsvd1; /* now used for context info */ - __u64 rsvd2; -}; /** Resets the SO write offset registers for transform feedback on gen7. */ #define I915_EXEC_GEN7_SOL_RESET (1<<8) @@ -1181,7 +1533,30 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) + + /** @rsvd1: Context id */ + __u64 rsvd1; + + /** + * @rsvd2: in and out sync_file file descriptors. + * + * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the + * lower 32 bits of this field will have the in sync_file fd (input). + * + * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this + * field will have the out sync_file fd (output). + */ + __u64 rsvd2; +}; #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ @@ -1231,7 +1606,7 @@ struct drm_i915_gem_busy { * is accurate. * * The returned dword is split into two fields to indicate both - * the engine classess on which the object is being read, and the + * the engine classes on which the object is being read, and the * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written @@ -1245,12 +1620,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1261,49 +1635,91 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE - * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. - */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. + * + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * The supported @caching values: + * + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +/* + * Do not add new tiling types here. The I915_TILING_* values are for + * de-tiling fence registers that no longer exist on modern platforms. Although + * the hardware may support new types of tiling in general (e.g., Tile4), we + * do not need to add them to the uapi that is specific to now-defunct ioctls. + */ #define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 @@ -1399,7 +1815,7 @@ struct drm_i915_gem_madvise { __u32 handle; /* Advice: either the buffer will be needed again in the near future, - * or wont be and could be discarded under memory pressure. + * or won't be and could be discarded under memory pressure. */ __u32 madv; @@ -1521,21 +1937,64 @@ struct drm_i915_gem_context_create { __u32 pad; }; +/** + * struct drm_i915_gem_context_create_ext - Structure for creating contexts. + */ struct drm_i915_gem_context_create_ext { - __u32 ctx_id; /* output: id of new context*/ + /** @ctx_id: Id of the created context (output) */ + __u32 ctx_id; + + /** + * @flags: Supported flags are: + * + * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: + * + * Extensions may be appended to this structure and driver must check + * for those. See @extensions. + * + * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE + * + * Created context will have single timeline. + */ __u32 flags; #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) + + /** + * @extensions: Zero-terminated chain of extensions. + * + * I915_CONTEXT_CREATE_EXT_SETPARAM: + * Context parameter to set or query during context creation. + * See struct drm_i915_gem_context_create_ext_setparam. + * + * I915_CONTEXT_CREATE_EXT_CLONE: + * This extension has been removed. On the off chance someone somewhere + * has attempted to use it, never re-use this extension number. + */ __u64 extensions; +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 +#define I915_CONTEXT_CREATE_EXT_CLONE 1 }; +/** + * struct drm_i915_gem_context_param - Context parameter to set or query. + */ struct drm_i915_gem_context_param { + /** @ctx_id: Context id */ __u32 ctx_id; + + /** @size: Size of the parameter @value */ __u32 size; + + /** @param: Parameter to set or query */ __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 @@ -1602,6 +2061,7 @@ struct drm_i915_gem_context_param { * Extensions: * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) + * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) */ #define I915_CONTEXT_PARAM_ENGINES 0xa @@ -1620,32 +2080,81 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc + +/* + * I915_CONTEXT_PARAM_PROTECTED_CONTENT: + * + * Mark that the context makes use of protected content, which will result + * in the context being invalidated when the protected content session is. + * Given that the protected content session is killed on suspend, the device + * is kept awake for the lifetime of a protected context, so the user should + * make sure to dispose of them once done. + * This flag can only be set at context creation time and, when set to true, + * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE + * to false. This flag can't be set to true in conjunction with setting the + * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: + * + * .. code-block:: C + * + * struct drm_i915_gem_context_create_ext_setparam p_protected = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, + * .value = 1, + * } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_norecover = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * .next_extension = to_user_pointer(&p_protected), + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_RECOVERABLE, + * .value = 0, + * } + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_norecover); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * In addition to the normal failure cases, setting this flag during context + * creation can result in the following errors: + * + * -ENODEV: feature not available + * -EPERM: trying to mark a recoverable or not bannable context as protected + * -ENXIO: A dependency such as a component driver or firmware is not yet + * loaded so user space may need to attempt again. Depending on the + * device, this error may be reported if protected context creation is + * attempted very early after kernel start because the internal timeout + * waiting for such dependencies is not guaranteed to be larger than + * required (numbers differ depending on system and kernel config): + * - ADL/RPL: dependencies may take up to 3 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * - MTL: dependencies may take up to 8 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * NOTE: such dependencies happen once, so a subsequent call to create a + * protected context after a prior successful call will not experience + * such timeouts and will not return -ENXIO (unless the driver is reloaded, + * or, depending on the device, resumes from a suspended state). + * -EIO: The firmware did not succeed in creating the protected context. + */ +#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ + /** @value: Context parameter value to be set or queried */ __u64 value; }; -/** +/* * Context SSEU programming * * It may be necessary for either functional or performance reason to configure @@ -1704,6 +2213,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1729,7 +2301,7 @@ struct i915_context_engines_load_balance { __u64 mbz64; /* reserved for future use; must be zero */ - struct i915_engine_class_instance engines[0]; + struct i915_engine_class_instance engines[]; } __attribute__((packed)); #define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \ @@ -1767,7 +2339,7 @@ struct i915_context_engines_bond { __u64 flags; /* all undefined flags must be zero */ __u64 mbz64[4]; /* reserved for future use; must be zero */ - struct i915_engine_class_instance engines[0]; + struct i915_engine_class_instance engines[]; } __attribute__((packed)); #define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \ @@ -1780,11 +2352,196 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * struct i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 to + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior are described below. Lastly, the default is to not allow BBs to be + * preempted mid-batch. Rather insert coordinated preemption points on all + * hardware contexts between each set of BBs. Flags could be added in the future + * to change both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Examples syntax: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * + * Example 1 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can be thought of as two virtual engines, each containing two + * engines thereby making a 2D array. However, there are bonds tying the + * entries together and placing restrictions on how they can be scheduled. + * Specifically, the scheduler can choose only vertical columns from the 2D + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice + * versa. Same for CS[2] requires also using CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logically contiguous, return -EINVAL + */ +struct i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine or in other words the + * number of batches in each submission + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context or in other words the + * number of possible placements for each submission + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[]; + +} __packed; + +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 width; \ + __u16 num_siblings; \ + __u16 mbz16; \ + __u64 flags; \ + __u64 mbz64[3]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ - struct i915_engine_class_instance engines[0]; +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ + struct i915_engine_class_instance engines[]; } __attribute__((packed)); #define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ @@ -1792,25 +2549,19 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * struct drm_i915_gem_context_create_ext_setparam - Context parameter + * to set or query during context creation. + */ struct drm_i915_gem_context_create_ext_setparam { -#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 + /** @base: Extension link. See struct i915_user_extension. */ struct i915_user_extension base; - struct drm_i915_gem_context_param param; -}; -struct drm_i915_gem_context_create_ext_clone { -#define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; + /** + * @param: Context parameter to set or query. + * See struct drm_i915_gem_context_param. + */ + struct drm_i915_gem_context_param param; }; struct drm_i915_gem_context_destroy { @@ -1818,7 +2569,9 @@ struct drm_i915_gem_context_destroy { __u32 pad; }; -/* +/** + * struct drm_i915_gem_vm_control - Structure to create or destroy VM. + * * DRM_I915_GEM_VM_CREATE - * * Create a new virtual memory address space (ppGTT) for use within a context @@ -1828,20 +2581,23 @@ struct drm_i915_gem_context_destroy { * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is * returned in the outparam @id. * - * No flags are defined, with all bits reserved and must be zero. - * * An extension chain maybe provided, starting with @extensions, and terminated * by the @next_extension being 0. Currently, no extensions are defined. * * DRM_I915_GEM_VM_DESTROY - * - * Destroys a previously created VM id, specified in @id. + * Destroys a previously created VM id, specified in @vm_id. * * No extensions or flags are allowed currently, and so must be zero. */ struct drm_i915_gem_vm_control { + /** @extensions: Zero-terminated chain of extensions. */ __u64 extensions; + + /** @flags: reserved for future usage, currently MBZ */ __u32 flags; + + /** @vm_id: Id of the VM created or to be destroyed */ __u32 vm_id; }; @@ -1883,14 +2639,69 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ @@ -1911,6 +2722,14 @@ enum drm_i915_oa_format { I915_OA_FORMAT_A12_B8_C8, I915_OA_FORMAT_A32u40_A4u32_B8_C8, + /* DG2 */ + I915_OAR_FORMAT_A32u40_A4u32_B8_C8, + I915_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* MTL OAM */ + I915_OAM_FORMAT_MPEC8u64_B8_C8, + I915_OAM_FORMAT_MPEC8u32_B8_C8, + I915_OA_FORMAT_MAX /* non-ABI */ }; @@ -1993,6 +2812,25 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_POLL_OA_PERIOD, + /** + * Multiple engines may be mapped to the same OA unit. The OA unit is + * identified by class:instance of any engine mapped to it. + * + * This parameter specifies the engine class and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_CLASS, + + /** + * This parameter specifies the engine instance and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -2012,7 +2850,7 @@ struct drm_i915_perf_open_param { __u64 properties_ptr; }; -/** +/* * Enable data capture for a stream that was either opened in a disabled state * via I915_PERF_FLAG_DISABLED or was later disabled via * I915_PERF_IOCTL_DISABLE. @@ -2026,7 +2864,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) -/** +/* * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. @@ -2035,7 +2873,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -/** +/* * Change metrics_set captured by a stream. * * If the stream is bound to a specific context, the configuration change @@ -2048,7 +2886,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) -/** +/* * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -2097,162 +2935,382 @@ enum drm_i915_perf_record_type { }; /** + * struct drm_i915_perf_oa_config + * * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { - /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + /** + * @uuid: + * + * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" + */ char uuid[36]; + /** + * @n_mux_regs: + * + * Number of mux regs in &mux_regs_ptr. + */ __u32 n_mux_regs; + + /** + * @n_boolean_regs: + * + * Number of boolean regs in &boolean_regs_ptr. + */ __u32 n_boolean_regs; + + /** + * @n_flex_regs: + * + * Number of flex regs in &flex_regs_ptr. + */ __u32 n_flex_regs; - /* - * These fields are pointers to tuples of u32 values (register address, - * value). For example the expected length of the buffer pointed by - * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + /** + * @mux_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_mux_regs). */ __u64 mux_regs_ptr; + + /** + * @boolean_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_boolean_regs). + */ __u64 boolean_regs_ptr; + + /** + * @flex_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_flex_regs). + */ __u64 flex_regs_ptr; }; +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ struct drm_i915_query_item { + /** + * @query_id: + * + * The id for this query. Currently accepted query IDs are: + * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) + * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config) + * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) + * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) + * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + */ __u64 query_id; -#define DRM_I915_QUERY_TOPOLOGY_INFO 1 -#define DRM_I915_QUERY_ENGINE_INFO 2 -#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_BLOB 5 +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 /* Must be kept compact -- no holes and well documented */ - /* + /** + * @length: + * * When set to zero by userspace, this is filled with the size of the - * data to be written at the data_ptr pointer. The kernel sets this + * data to be written at the @data_ptr pointer. The kernel sets this * value to a negative value to signal an error on a particular query * item. */ __s32 length; - /* - * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + /** + * @flags: + * + * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following: + * + * - %DRM_I915_QUERY_PERF_CONFIG_LIST + * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID * - * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the - * following : - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain + * a struct i915_engine_class_instance that references a render engine. */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - /* - * Data will be written at the location pointed by data_ptr when the - * value of length matches the length of the data to be written by the + /** + * @data_ptr: + * + * Data will be written at the location pointed by @data_ptr when the + * value of @length matches the length of the data to be written by the * kernel. */ __u64 data_ptr; }; +/** + * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the + * kernel to fill out. + * + * Note that this is generally a two step process for each struct + * drm_i915_query_item in the array: + * + * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct + * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The + * kernel will then fill in the size, in bytes, which tells userspace how + * memory it needs to allocate for the blob(say for an array of properties). + * + * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the + * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that + * the &drm_i915_query_item.length should still be the same as what the + * kernel previously set. At this point the kernel can fill in the blob. + * + * Note that for some query items it can make sense for userspace to just pass + * in a buffer/blob equal to or larger than the required size. In this case only + * a single ioctl call is needed. For some smaller query items this can work + * quite well. + * + */ struct drm_i915_query { + /** @num_items: The number of elements in the @items_ptr array */ __u32 num_items; - /* - * Unused for now. Must be cleared to zero. + /** + * @flags: Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * This points to an array of num_items drm_i915_query_item structures. + /** + * @items_ptr: + * + * Pointer to an array of struct drm_i915_query_item. The number of + * array elements is @num_items. */ __u64 items_ptr; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : - * - * data: contains the 3 pieces of information : - * - * - the slice mask with one bit per slice telling whether a slice is - * available. The availability of slice X can be queried with the following - * formula : - * - * (data[X / 8] >> (X % 8)) & 1 - * - * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. Gen12 has dual-subslices, which are - * similar to two gen11 subslices. For gen12, this array represents dual- - * subslices. The availability of subslice Y in slice X can be queried - * with the following formula : - * - * (data[subslice_offset + - * X * subslice_stride + - * Y / 8] >> (Y % 8)) & 1 - * - * - the EU mask for each subslice in each slice with one bit per EU telling - * whether an EU is available. The availability of EU Z in subslice Y in - * slice X can be queried with the following formula : +/** + * struct drm_i915_query_topology_info * - * (data[eu_offset + - * (X * max_subslices + Y) * eu_stride + - * Z / 8] >> (Z % 8)) & 1 + * Describes slice/subslice/EU information queried by + * %DRM_I915_QUERY_TOPOLOGY_INFO */ struct drm_i915_query_topology_info { - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u16 flags; + /** + * @max_slices: + * + * The number of bits used to express the slice mask. + */ __u16 max_slices; + + /** + * @max_subslices: + * + * The number of bits used to express the subslice mask. + */ __u16 max_subslices; + + /** + * @max_eus_per_subslice: + * + * The number of bits in the EU mask that correspond to a single + * subslice's EUs. + */ __u16 max_eus_per_subslice; - /* + /** + * @subslice_offset: + * * Offset in data[] at which the subslice masks are stored. */ __u16 subslice_offset; - /* + /** + * @subslice_stride: + * * Stride at which each of the subslice masks for each slice are * stored. */ __u16 subslice_stride; - /* + /** + * @eu_offset: + * * Offset in data[] at which the EU masks are stored. */ __u16 eu_offset; - /* + /** + * @eu_stride: + * * Stride at which each of the EU masks for each subslice are stored. */ __u16 eu_stride; + /** + * @data: + * + * Contains 3 pieces of information : + * + * - The slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the + * following formula : + * + * .. code:: c + * + * (data[X / 8] >> (X % 8)) & 1 + * + * Starting with Xe_HP platforms, Intel hardware no longer has + * traditional slices so i915 will always report a single slice + * (hardcoded slicemask = 0x1) which contains all of the platform's + * subslices. I.e., the mask here does not reflect any of the newer + * hardware concepts such as "gslices" or "cslices" since userspace + * is capable of inferring those from the subslice mask. + * + * - The subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. Starting with Gen12 we use the + * term "subslice" to refer to what the hardware documentation + * describes as a "dual-subslices." The availability of subslice Y + * in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1 + * + * - The EU mask for each subslice in each slice, with one bit per EU + * telling whether an EU is available. The availability of EU Z in + * subslice Y in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8 + * ] >> (Z % 8)) & 1 + */ __u8 data[]; }; /** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straightaway enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info; + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + +/** * struct drm_i915_engine_info * - * Describes one engine and it's capabilities as known to the driver. + * Describes one engine and its capabilities as known to the driver. */ struct drm_i915_engine_info { - /** Engine class and instance. */ + /** @engine: Engine class and instance. */ struct i915_engine_class_instance engine; - /** Reserved field. */ + /** @rsvd0: Reserved field. */ __u32 rsvd0; - /** Engine flags. */ + /** @flags: Engine flags. */ __u64 flags; +#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) - /** Capabilities of this engine. */ + /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - /** Reserved fields. */ - __u64 rsvd1[4]; + /** @logical_instance: Logical instance of engine */ + __u16 logical_instance; + + /** @rsvd1: Reserved fields. */ + __u16 rsvd1[3]; + /** @rsvd2: Reserved fields. */ + __u64 rsvd2[3]; }; /** @@ -2262,66 +3320,527 @@ struct drm_i915_engine_info { * an array of struct drm_i915_engine_info structures. */ struct drm_i915_query_engine_info { - /** Number of struct drm_i915_engine_info structs following. */ + /** @num_engines: Number of struct drm_i915_engine_info structs following. */ __u32 num_engines; - /** MBZ */ + /** @rsvd: MBZ */ __u32 rsvd[3]; - /** Marker for drm_i915_engine_info structures. */ + /** @engines: Marker for drm_i915_engine_info structures. */ struct drm_i915_engine_info engines[]; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. +/** + * struct drm_i915_query_perf_config + * + * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and + * %DRM_I915_QUERY_GEOMETRY_SUBSLICES. */ struct drm_i915_query_perf_config { union { - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets - * this fields to the number of configurations available. + /** + * @n_configs: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to + * the number of configurations available. */ __u64 n_configs; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @config: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. */ __u64 config; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @uuid: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. * * String formatted like "%08x-%04x-%04x-%04x-%012x" */ char uuid[36]; }; - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will - * write an array of __u64 of configuration identifiers. + /** + * @data: + * + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST, + * i915 will write an array of __u64 of configuration identifiers. * - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will - * write a struct drm_i915_perf_oa_config. If the following fields of - * drm_i915_perf_oa_config are set not set to 0, i915 will write into - * the associated pointers the values of submitted when the + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA, + * i915 will write a struct drm_i915_perf_oa_config. If the following + * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will + * write into the associated pointers the values of submitted when the * configuration was created : * - * - n_mux_regs - * - n_boolean_regs - * - n_flex_regs + * - &drm_i915_perf_oa_config.n_mux_regs + * - &drm_i915_perf_oa_config.n_boolean_regs + * - &drm_i915_perf_oa_config.n_flex_regs */ __u8 data[]; }; +/** + * enum drm_i915_gem_memory_class - Supported memory classes + */ +enum drm_i915_gem_memory_class { + /** @I915_MEMORY_CLASS_SYSTEM: System memory */ + I915_MEMORY_CLASS_SYSTEM = 0, + /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */ + I915_MEMORY_CLASS_DEVICE, +}; + +/** + * struct drm_i915_gem_memory_class_instance - Identify particular memory region + */ +struct drm_i915_gem_memory_class_instance { + /** @memory_class: See enum drm_i915_gem_memory_class */ + __u16 memory_class; + + /** @memory_instance: Which instance */ + __u16 memory_instance; +}; + +/** + * struct drm_i915_memory_region_info - Describes one region as known to the + * driver. + * + * Note this is using both struct drm_i915_query_item and struct drm_i915_query. + * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS + * at &drm_i915_query_item.query_id. + */ +struct drm_i915_memory_region_info { + /** @region: The class:instance pair encoding */ + struct drm_i915_gem_memory_class_instance region; + + /** @rsvd0: MBZ */ + __u32 rsvd0; + + /** + * @probed_size: Memory probed by the driver + * + * Note that it should not be possible to ever encounter a zero value + * here, also note that no current region type will ever return -1 here. + * Although for future region types, this might be a possibility. The + * same applies to the other size fields. + */ + __u64 probed_size; + + /** + * @unallocated_size: Estimate of memory remaining + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. + * Without this (or if this is an older kernel) the value here will + * always equal the @probed_size. Note this is only currently tracked + * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here + * will always equal the @probed_size). + */ + __u64 unallocated_size; + + union { + /** @rsvd1: MBZ */ + __u64 rsvd1[8]; + struct { + /** + * @probed_cpu_visible_size: Memory probed by the driver + * that is CPU accessible. + * + * This will be always be <= @probed_size, and the + * remainder (if there is any) will not be CPU + * accessible. + * + * On systems without small BAR, the @probed_size will + * always equal the @probed_cpu_visible_size, since all + * of it will be CPU accessible. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the @probed_size). + * + * Note that if the value returned here is zero, then + * this must be an old kernel which lacks the relevant + * small-bar uAPI support (including + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on + * such systems we should never actually end up with a + * small BAR configuration, assuming we are able to load + * the kernel module. Hence it should be safe to treat + * this the same as when @probed_cpu_visible_size == + * @probed_size. + */ + __u64 probed_cpu_visible_size; + + /** + * @unallocated_cpu_visible_size: Estimate of CPU + * visible memory remaining. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the + * @probed_cpu_visible_size). + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always + * equal the @probed_cpu_visible_size. Note this is only + * currently tracked for I915_MEMORY_CLASS_DEVICE + * regions (for other types the value here will also + * always equal the @probed_cpu_visible_size). + * + * If this is an older kernel the value here will be + * zero, see also @probed_cpu_visible_size. + */ + __u64 unallocated_cpu_visible_size; + }; + }; +}; + +/** + * struct drm_i915_query_memory_regions + * + * The region info query enumerates all regions known to the driver by filling + * in an array of struct drm_i915_memory_region_info structures. + * + * Example for getting the list of supported regions: + * + * .. code-block:: C + * + * struct drm_i915_query_memory_regions *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_MEMORY_REGIONS; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of regions. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with the all the region info. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each region in the array + * for (i = 0; i < info->num_regions; i++) { + * struct drm_i915_memory_region_info mr = info->regions[i]; + * u16 class = mr.region.class; + * u16 instance = mr.region.instance; + * + * .... + * } + * + * free(info); + */ +struct drm_i915_query_memory_regions { + /** @num_regions: Number of supported regions */ + __u32 num_regions; + + /** @rsvd: MBZ */ + __u32 rsvd[3]; + + /** @regions: Info about each supported region */ + struct drm_i915_memory_region_info regions[]; +}; + +/** + * DOC: GuC HWCONFIG blob uAPI + * + * The GuC produces a blob with information about the current device. + * i915 reads this blob from GuC and makes it available via this uAPI. + * + * The format and meaning of the blob content are documented in the + * Programmer's Reference Manual. + */ + +/** + * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added + * extension support using struct i915_user_extension. + * + * Note that new buffer flags should be added here, at least for the stuff that + * is immutable. Previously we would have two ioctls, one to create the object + * with gem_create, and another to apply various parameters, however this + * creates some ambiguity for the params which are considered immutable. Also in + * general we're phasing out the various SET/GET ioctls. + */ +struct drm_i915_gem_create_ext { + /** + * @size: Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + * + * On platforms like DG2/ATS the kernel will always use 64K or larger + * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a + * minimum of 64K GTT alignment for such objects. + * + * NOTE: Previously the ABI here required a minimum GTT alignment of 2M + * on DG2/ATS, due to how the hardware implemented 64K GTT page support, + * where we had the following complications: + * + * 1) The entire PDE (which covers a 2MB virtual address range), must + * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same + * PDE is forbidden by the hardware. + * + * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM + * objects. + * + * However on actual production HW this was completely changed to now + * allow setting a TLB hint at the PTE level (see PS64), which is a lot + * more flexible than the above. With this the 2M restriction was + * dropped where we now only require 64K. + */ + __u64 size; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** + * @flags: Optional flags. + * + * Supported values: + * + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that + * the object will need to be accessed via the CPU. + * + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only + * strictly required on configurations where some subset of the device + * memory is directly visible/mappable through the CPU (which we also + * call small BAR), like on some DG2+ systems. Note that this is quite + * undesirable, but due to various factors like the client CPU, BIOS etc + * it's something we can expect to see in the wild. See + * &drm_i915_memory_region_info.probed_cpu_visible_size for how to + * determine if this system applies. + * + * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to + * ensure the kernel can always spill the allocation to system memory, + * if the object can't be allocated in the mappable part of + * I915_MEMORY_CLASS_DEVICE. + * + * Also note that since the kernel only supports flat-CCS on objects + * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore + * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with + * flat-CCS. + * + * Without this hint, the kernel will assume that non-mappable + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the + * kernel can still migrate the object to the mappable part, as a last + * resort, if userspace ever CPU faults this object, but this might be + * expensive, and so ideally should be avoided. + * + * On older kernels which lack the relevant small-bar uAPI support (see + * also &drm_i915_memory_region_info.probed_cpu_visible_size), + * usage of the flag will result in an error, but it should NEVER be + * possible to end up with a small BAR configuration, assuming we can + * also successfully load the i915 kernel module. In such cases the + * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as + * such there are zero restrictions on where the object can be placed. + */ +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) + __u32 flags; + + /** + * @extensions: The chain of extensions to apply to this object. + * + * This will be useful in the future when we need to support several + * different extensions, and we need to apply more than one when + * creating the object. See struct i915_user_extension. + * + * If we don't supply any extensions then we get the same old gem_create + * behaviour. + * + * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see + * struct drm_i915_gem_create_ext_memory_regions. + * + * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see + * struct drm_i915_gem_create_ext_protected_content. + * + * For I915_GEM_CREATE_EXT_SET_PAT usage see + * struct drm_i915_gem_create_ext_set_pat. + */ +#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 +#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 +#define I915_GEM_CREATE_EXT_SET_PAT 2 + __u64 extensions; +}; + +/** + * struct drm_i915_gem_create_ext_memory_regions - The + * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension. + * + * Set the object with the desired set of placements/regions in priority + * order. Each entry must be unique and supported by the device. + * + * This is provided as an array of struct drm_i915_gem_memory_class_instance, or + * an equivalent layout of class:instance pair encodings. See struct + * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to + * query the supported regions for a device. + * + * As an example, on discrete devices, if we wish to set the placement as + * device local-memory we can do something like: + * + * .. code-block:: C + * + * struct drm_i915_gem_memory_class_instance region_lmem = { + * .memory_class = I915_MEMORY_CLASS_DEVICE, + * .memory_instance = 0, + * }; + * struct drm_i915_gem_create_ext_memory_regions regions = { + * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, + * .regions = (uintptr_t)®ion_lmem, + * .num_regions = 1, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = 16 * PAGE_SIZE, + * .extensions = (uintptr_t)®ions, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + * + * At which point we get the object handle in &drm_i915_gem_create_ext.handle, + * along with the final object size in &drm_i915_gem_create_ext.size, which + * should account for any rounding up, if required. + * + * Note that userspace has no means of knowing the current backing region + * for objects where @num_regions is larger than one. The kernel will only + * ensure that the priority order of the @regions array is honoured, either + * when initially placing the object, or when moving memory around due to + * memory pressure + * + * On Flat-CCS capable HW, compression is supported for the objects residing + * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other + * memory class in @regions and migrated (by i915, due to memory + * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to + * decompress the content. But i915 doesn't have the required information to + * decompress the userspace compressed objects. + * + * So i915 supports Flat-CCS, on the objects which can reside only on + * I915_MEMORY_CLASS_DEVICE regions. + */ +struct drm_i915_gem_create_ext_memory_regions { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** @pad: MBZ */ + __u32 pad; + /** @num_regions: Number of elements in the @regions array. */ + __u32 num_regions; + /** + * @regions: The regions/placements array. + * + * An array of struct drm_i915_gem_memory_class_instance. + */ + __u64 regions; +}; + +/** + * struct drm_i915_gem_create_ext_protected_content - The + * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. + * + * If this extension is provided, buffer contents are expected to be protected + * by PXP encryption and require decryption for scan out and processing. This + * is only possible on platforms that have PXP enabled, on all other scenarios + * using this extension will cause the ioctl to fail and return -ENODEV. The + * flags parameter is reserved for future expansion and must currently be set + * to zero. + * + * The buffer contents are considered invalid after a PXP session teardown. + * + * The encryption is guaranteed to be processed correctly only if the object + * is submitted with a context created using the + * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks + * at submission time on the validity of the objects involved. + * + * Below is an example on how to create a protected object: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_protected_content protected_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, + * .flags = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&protected_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_protected_content { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** @flags: reserved for future usage, currently MBZ */ + __u32 flags; +}; + +/** + * struct drm_i915_gem_create_ext_set_pat - The + * I915_GEM_CREATE_EXT_SET_PAT extension. + * + * If this extension is provided, the specified caching policy (PAT index) is + * applied to the buffer object. + * + * Below is an example on how to create an object with specific caching policy: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_set_pat set_pat_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT }, + * .pat_index = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&set_pat_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_set_pat { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** + * @pat_index: PAT index to be set + * PAT index is a bit field in Page Table Entry to control caching + * behaviors for GPU accesses. The definition of PAT index is + * platform dependent and can be found in hardware specifications, + */ + __u32 pat_index; + /** @rsvd: reserved for future use */ + __u32 rsvd; +}; + +/* ID of the protected content session managed by i915 when PXP is active */ +#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf + #if defined(__cplusplus) } #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..3c42b9f1bada 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -19,7 +19,9 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_MEMSX 0x80 /* load with sign extension */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -40,9 +42,19 @@ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSLT 0xc0 /* SLT is signed, '<' */ #define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ + +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + /* Register numbers */ enum { BPF_REG_0 = 0, @@ -70,24 +82,843 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ +}; + +enum bpf_cgroup_iter_order { + BPF_CGROUP_ITER_ORDER_UNSPEC = 0, + BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */ + BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ }; union bpf_iter_link_info { struct { __u32 map_fd; } map; + struct { + enum bpf_cgroup_iter_order order; + + /* At most one of cgroup_fd and cgroup_id can be non-zero. If + * both are zero, the walk starts from the default cgroup v2 + * root. For walking v1 hierarchy, one should always explicitly + * specify cgroup_fd. + */ + __u32 cgroup_fd; + __u64 cgroup_id; + } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; -/* BPF syscall commands, see bpf(2) man-page for details. */ +/* BPF syscall commands, see bpf(2) man-page for more details. */ +/** + * DOC: eBPF Syscall Preamble + * + * The operation to be performed by the **bpf**\ () system call is determined + * by the *cmd* argument. Each operation takes an accompanying argument, + * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see + * below). The size argument is the size of the union pointed to by *attr*. + */ +/** + * DOC: eBPF Syscall Commands + * + * BPF_MAP_CREATE + * Description + * Create a map and return a file descriptor that refers to the + * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) + * is automatically enabled for the new file descriptor. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_MAP_CREATE** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_LOOKUP_ELEM + * Description + * Look up an element with a given *key* in the map referred to + * by the file descriptor *map_fd*. + * + * The *flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_ELEM + * Description + * Create or update an element (key/value pair) in a specified map. + * + * The *flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create a new element or update an existing element. + * **BPF_NOEXIST** + * Create a new element only if it did not exist. + * **BPF_EXIST** + * Update an existing element. + * **BPF_F_LOCK** + * Update a spin_lock-ed map element. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, + * **E2BIG**, **EEXIST**, or **ENOENT**. + * + * **E2BIG** + * The number of elements in the map reached the + * *max_entries* limit specified at map creation time. + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_ELEM + * Description + * Look up and delete an element by key in a specified map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_GET_NEXT_KEY + * Description + * Look up an element by key in a specified map and return the key + * of the next element. Can be used to iterate over all elements + * in the map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * The following cases can be used to iterate over all elements of + * the map: + * + * * If *key* is not found, the operation returns zero and sets + * the *next_key* pointer to the key of the first element. + * * If *key* is found, the operation returns zero and sets the + * *next_key* pointer to the key of the next element. + * * If *key* is the last element, returns -1 and *errno* is set + * to **ENOENT**. + * + * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or + * **EINVAL** on error. + * + * BPF_PROG_LOAD + * Description + * Verify and load an eBPF program, returning a new file + * descriptor associated with the program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). + * + * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is + * automatically enabled for the new file descriptor. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_PIN + * Description + * Pin an eBPF program or map referred by the specified *bpf_fd* + * to the provided *pathname* on the filesystem. + * + * The *pathname* argument must not contain a dot ("."). + * + * On success, *pathname* retains a reference to the eBPF object, + * preventing deallocation of the object when the original + * *bpf_fd* is closed. This allow the eBPF object to live beyond + * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent + * process. + * + * Applying **unlink**\ (2) or similar calls to the *pathname* + * unpins the object from the filesystem, removing the reference. + * If no other file descriptors or filesystem nodes refer to the + * same object, it will be deallocated (see NOTES). + * + * The filesystem type for the parent directory of *pathname* must + * be **BPF_FS_MAGIC**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_OBJ_GET + * Description + * Open a file descriptor for the eBPF object pinned to the + * specified *pathname*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ATTACH + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook. + * + * The *attach_type* specifies the eBPF attachment point to + * attach the program to, and must be one of *bpf_attach_type* + * (see below). + * + * The *attach_bpf_fd* must be a valid file descriptor for a + * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap + * or sock_ops type corresponding to the specified *attach_type*. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_TYPE_SK_SKB**, + * **BPF_PROG_TYPE_SK_MSG** + * + * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_DETACH + * Description + * Detach the eBPF program associated with the *target_fd* at the + * hook specified by *attach_type*. The program must have been + * previously attached using **BPF_PROG_ATTACH**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_TEST_RUN + * Description + * Run the eBPF program associated with the *prog_fd* a *repeat* + * number of times against a provided program context *ctx_in* and + * data *data_in*, and return the modified program context + * *ctx_out*, *data_out* (for example, packet data), result of the + * execution *retval*, and *duration* of the test run. + * + * The sizes of the buffers provided as input and output + * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must + * be provided in the corresponding variables *ctx_size_in*, + * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any + * of these parameters are not provided (ie set to NULL), the + * corresponding size field must be zero. + * + * Some program types have particular requirements: + * + * **BPF_PROG_TYPE_SK_LOOKUP** + * *data_in* and *data_out* must be NULL. + * + * **BPF_PROG_TYPE_RAW_TRACEPOINT**, + * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** + * + * *ctx_out*, *data_in* and *data_out* must be NULL. + * *repeat* must be zero. + * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * **ENOSPC** + * Either *data_size_out* or *ctx_size_out* is too small. + * **ENOTSUPP** + * This command is not supported by the program type of + * the program referred to by *prog_fd*. + * + * BPF_PROG_GET_NEXT_ID + * Description + * Fetch the next eBPF program currently loaded into the kernel. + * + * Looks for the eBPF program with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF programs + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_GET_NEXT_ID + * Description + * Fetch the next eBPF map currently loaded into the kernel. + * + * Looks for the eBPF map with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF maps + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_PROG_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF program corresponding to + * *prog_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF map corresponding to + * *map_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_GET_INFO_BY_FD + * Description + * Obtain information about the eBPF object corresponding to + * *bpf_fd*. + * + * Populates up to *info_len* bytes of *info*, which will be in + * one of the following formats depending on the eBPF object type + * of *bpf_fd*: + * + * * **struct bpf_prog_info** + * * **struct bpf_map_info** + * * **struct bpf_btf_info** + * * **struct bpf_link_info** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_QUERY + * Description + * Obtain information about eBPF programs associated with the + * specified *attach_type* hook. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_QUERY** always fetches the number of programs + * attached and the *attach_flags* which were used to attach those + * programs. Additionally, if *prog_ids* is nonzero and the number + * of attached programs is less than *prog_cnt*, populates + * *prog_ids* with the eBPF program ids of the programs attached + * at *target_fd*. + * + * The following flags may alter the result: + * + * **BPF_F_QUERY_EFFECTIVE** + * Only return information regarding programs which are + * currently effective at the specified *target_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_RAW_TRACEPOINT_OPEN + * Description + * Attach an eBPF program to a tracepoint *name* to access kernel + * internal arguments of the tracepoint in their raw form. + * + * The *prog_fd* must be a valid file descriptor associated with + * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. + * + * No ABI guarantees are made about the content of tracepoint + * arguments exposed to the corresponding eBPF program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_LOAD + * Description + * Verify and load BPF Type Format (BTF) metadata into the kernel, + * returning a new file descriptor associated with the metadata. + * BTF is described in more detail at + * https://www.kernel.org/doc/html/latest/bpf/btf.html. + * + * The *btf* parameter must point to valid memory providing + * *btf_size* bytes of BTF binary metadata. + * + * The returned file descriptor can be passed to other **bpf**\ () + * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to + * associate the BTF with those objects. + * + * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional + * parameters to specify a *btf_log_buf*, *btf_log_size* and + * *btf_log_level* which allow the kernel to return freeform log + * output regarding the BTF verification process. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_GET_FD_BY_ID + * Description + * Open a file descriptor for the BPF Type Format (BTF) + * corresponding to *btf_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_TASK_FD_QUERY + * Description + * Obtain information about eBPF programs associated with the + * target process identified by *pid* and *fd*. + * + * If the *pid* and *fd* are associated with a tracepoint, kprobe + * or uprobe perf event, then the *prog_id* and *fd_type* will + * be populated with the eBPF program id and file descriptor type + * of type **bpf_task_fd_type**. If associated with a kprobe or + * uprobe, the *probe_offset* and *probe_addr* will also be + * populated. Optionally, if *buf* is provided, then up to + * *buf_len* bytes of *buf* will be populated with the name of + * the tracepoint, kprobe or uprobe. + * + * The resulting *prog_id* may be introspected in deeper detail + * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_ELEM + * Description + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. + * The *key* and *key_len* parameters should be zeroed when + * issuing this operation for these map types. + * + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_FREEZE + * Description + * Freeze the permissions of the specified map. + * + * Write permissions may be frozen by passing zero *flags*. + * Upon success, no future syscall invocations may alter the + * map state of *map_fd*. Write operations from eBPF programs + * are still possible for a frozen map. + * + * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_BTF_GET_NEXT_ID + * Description + * Fetch the next BPF Type Format (BTF) object currently loaded + * into the kernel. + * + * Looks for the BTF object with an id greater than *start_id* + * and updates *next_id* on success. If no other BTF objects + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_LOOKUP_BATCH + * Description + * Iterate and fetch multiple elements in a map. + * + * Two opaque values are used to manage batch operations, + * *in_batch* and *out_batch*. Initially, *in_batch* must be set + * to NULL to begin the batched operation. After each subsequent + * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant + * *out_batch* as the *in_batch* for the next operation to + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. + * + * The *keys* and *values* are output parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are copied into the + * user buffer, with the keys copied into *keys* and the values + * copied into the corresponding indices in *values*. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **ENOSPC** to indicate that *keys* or + * *values* is too small to dump an entire bucket during + * iteration of a hash-based map type. + * + * BPF_MAP_LOOKUP_AND_DELETE_BATCH + * Description + * Iterate and delete all elements in a map. + * + * This operation has the same behavior as + * **BPF_MAP_LOOKUP_BATCH** with two exceptions: + * + * * Every element that is successfully returned is also deleted + * from the map. This is at least *count* elements. Note that + * *count* is both an input and an output parameter. + * * Upon returning with *errno* set to **EFAULT**, up to + * *count* elements may be deleted without returning the keys + * and values of the deleted elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_BATCH + * Description + * Update multiple elements in a map by *key*. + * + * The *keys* and *values* are input parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * Each element specified in *keys* is sequentially updated to the + * value in the corresponding index in *values*. The *in_batch* + * and *out_batch* parameters are ignored and should be zeroed. + * + * The *elem_flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create new elements or update a existing elements. + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * **BPF_EXIST** + * Update existing elements. + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or + * **E2BIG**. **E2BIG** indicates that the number of elements in + * the map reached the *max_entries* limit specified at map + * creation time. + * + * May set *errno* to one of the following error codes under + * specific circumstances: + * + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_BATCH + * Description + * Delete multiple elements in a map by *key*. + * + * The *keys* parameter is an input parameter which must point + * to memory large enough to hold *count* items based on the key + * size of the map *map_fd*, that is, *key_size* * *count*. + * + * Each element specified in *keys* is sequentially deleted. The + * *in_batch*, *out_batch*, and *values* parameters are ignored + * and should be zeroed. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. If + * *errno* is **EFAULT**, up to *count* elements may be been + * deleted. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_CREATE + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook and return a file descriptor handle for + * managing the link. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_UPDATE + * Description + * Update the eBPF program in the specified *link_fd* to + * *new_prog_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF Link corresponding to + * *link_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_GET_NEXT_ID + * Description + * Fetch the next eBPF link currently loaded into the kernel. + * + * Looks for the eBPF link with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF links + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_ENABLE_STATS + * Description + * Enable eBPF runtime statistics gathering. + * + * Runtime statistics gathering for the eBPF runtime is disabled + * by default to minimize the corresponding performance overhead. + * This command enables statistics globally. + * + * Multiple programs may independently enable statistics. + * After gathering the desired statistics, eBPF runtime statistics + * may be disabled again by calling **close**\ (2) for the file + * descriptor returned by this function. Statistics will only be + * disabled system-wide when all outstanding file descriptors + * returned by prior calls for this subcommand are closed. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_ITER_CREATE + * Description + * Create an iterator on top of the specified *link_fd* (as + * previously created using **BPF_LINK_CREATE**) and return a + * file descriptor that can be used to trigger the iteration. + * + * If the resulting file descriptor is pinned to the filesystem + * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls + * for that path will trigger the iterator to read kernel state + * using the eBPF program attached to *link_fd*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_DETACH + * Description + * Forcefully detach the specified *link_fd* from its + * corresponding attachment point. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_BIND_MAP + * Description + * Bind a map to the lifetime of an eBPF program. + * + * The map identified by *map_fd* is bound to the program + * identified by *prog_fd* and only released when *prog_fd* is + * released. This may be used in cases where metadata should be + * associated with a program which otherwise does not contain any + * references to the map (for example, embedded in the eBPF + * program instructions). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * NOTES + * eBPF objects (maps and programs) can be shared between processes. + * + * * After **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. + * * File descriptors referring to eBPF objects can be transferred over + * **unix**\ (7) domain sockets. + * * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. + * * File descriptors referring to eBPF objects can be pinned to the + * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). + * + * An eBPF object is deallocated only after all file descriptors referring + * to the object have been closed and no references remain pinned to the + * filesystem or attached (for example, bound to a program or device). + */ enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, @@ -100,6 +931,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, @@ -125,6 +957,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -147,9 +981,23 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs + * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE + + * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * deprecated. + */ + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, @@ -157,6 +1005,12 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, + BPF_MAP_TYPE_ARENA, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -199,6 +1053,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -240,6 +1097,24 @@ enum bpf_attach_type { BPF_XDP_CPUMAP, BPF_SK_LOOKUP, BPF_XDP, + BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, + BPF_STRUCT_OPS, + BPF_NETFILTER, + BPF_TCX_INGRESS, + BPF_TCX_EGRESS, + BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_NETKIT_PRIMARY, + BPF_NETKIT_PEER, __MAX_BPF_ATTACH_TYPE }; @@ -253,8 +1128,26 @@ enum bpf_link_type { BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, + BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, + BPF_LINK_TYPE_TCX = 11, + BPF_LINK_TYPE_UPROBE_MULTI = 12, + BPF_LINK_TYPE_NETKIT = 13, + __MAX_BPF_LINK_TYPE, +}; - MAX_BPF_LINK_TYPE, +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + +enum bpf_perf_event_type { + BPF_PERF_EVENT_UNSPEC = 0, + BPF_PERF_EVENT_UPROBE = 1, + BPF_PERF_EVENT_URETPROBE = 2, + BPF_PERF_EVENT_KPROBE = 3, + BPF_PERF_EVENT_KRETPROBE = 4, + BPF_PERF_EVENT_TRACEPOINT = 5, + BPF_PERF_EVENT_EVENT = 6, }; /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command @@ -303,7 +1196,12 @@ enum bpf_link_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +/* Generic attachment flags. */ #define BPF_F_REPLACE (1U << 2) +#define BPF_F_BEFORE (1U << 3) +#define BPF_F_AFTER (1U << 4) +#define BPF_F_ID (1U << 5) +#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -312,7 +1210,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and @@ -355,11 +1253,43 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) + +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +enum { + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) +}; + +/* link_create.uprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. + */ +enum { + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) +}; + +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -367,15 +1297,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -386,11 +1320,28 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function */ #define BPF_PSEUDO_CALL 1 +/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, + * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel + */ +#define BPF_PSEUDO_KFUNC_CALL 2 + +enum bpf_addr_space_cast { + BPF_ADDR_SPACE_CAST = 1, +}; /* flags for BPF_MAP_UPDATE_ELEM command */ enum { @@ -438,13 +1389,31 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), + +/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */ + BPF_F_SEGV_ON_FAULT = (1U << 17), + +/* Do not translate kernel bpf_arena pointers to user pointers */ + BPF_F_NO_USER_CONV = (1U << 18), }; /* Flags for BPF_PROG_QUERY. */ /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -452,6 +1421,8 @@ enum { /* If set, run the test on the cpu specified by bpf_attr.test.cpu */ #define BPF_F_TEST_RUN_ON_CPU (1U << 0) +/* If set, XDP frames will be transmitted after processing */ +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -502,6 +1473,25 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. + */ + __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -556,24 +1546,54 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ - __u32 target_fd; /* container object to attach to */ - __u32 attach_bpf_fd; /* eBPF program to attach */ + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_bpf_fd; __u32 attach_type; __u32 attach_flags; - __u32 replace_bpf_fd; /* previously attached eBPF - * program to replace if - * BPF_F_REPLACE is used - */ + __u32 replace_bpf_fd; + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -597,6 +1617,7 @@ union bpf_attr { __aligned_u64 ctx_out; __u32 flags; __u32 cpu; + __u32 batch_size; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ @@ -618,12 +1639,26 @@ union bpf_attr { } info; struct { /* anonymous struct used by BPF_PROG_QUERY command */ - __u32 target_fd; /* container object to query */ + union { + __u32 target_fd; /* target object to query or ... */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; __u32 query_flags; __u32 attach_flags; __aligned_u64 prog_ids; - __u32 prog_cnt; + union { + __u32 prog_cnt; + __u32 count; + }; + __u32 :32; + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; + __aligned_u64 link_ids; + __aligned_u64 link_attach_flags; + __u64 revision; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -637,6 +1672,16 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { @@ -656,30 +1701,96 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ union { - __u32 target_fd; /* object to attach to */ - __u32 target_ifindex; /* target ifindex */ + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ union { - __u32 target_btf_id; /* btf_id of target to attach to */ + __u32 target_btf_id; /* btf_id of target to attach to */ struct { __aligned_u64 iter_info; /* extra bpf_iter_link_info */ __u32 iter_info_len; /* iter_info length */ }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + __aligned_u64 cookies; + } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } tcx; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 cnt; + __u32 flags; + __u32 pid; + } uprobe_multi; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } netkit; }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { @@ -701,6 +1812,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -708,7 +1824,7 @@ union bpf_attr { * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 @@ -773,17 +1889,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * @@ -843,7 +1959,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return @@ -950,7 +2066,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * @@ -976,9 +2092,13 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -986,6 +2106,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -1128,6 +2250,9 @@ union bpf_attr { * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. + * **BPF_F_NO_TUNNEL_KEY** + * Add a flag to tunnel metadata indicating that no tunnel + * key should be set in the resulting tunnel header. * * Here is a typical usage on the transmit path: * @@ -1460,6 +2585,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -1490,8 +2617,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -1523,7 +2650,8 @@ union bpf_attr { * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. @@ -1573,6 +2701,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -1644,24 +2774,34 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. + * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -1687,8 +2827,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -1696,14 +2836,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -1722,10 +2867,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * @@ -1745,6 +2892,15 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: + * Indicate the new IP header version after decapsulating the outer + * IP header. Used when the inner and outer IP versions are different. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -1753,7 +2909,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1765,8 +2921,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -1885,7 +3045,7 @@ union bpf_attr { * * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description - * For en eBPF program attached to a perf event, retrieve the + * For an eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in * the structure pointed by *buf* and of size *buf_size*. Enabled * and running times are also stored in the structure (see @@ -1906,16 +3066,14 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * @@ -2149,8 +3307,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject @@ -2163,8 +3331,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack=<new value> * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -2207,9 +3375,23 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -2219,6 +3401,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -2425,6 +3610,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -2442,7 +3630,7 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. @@ -2450,7 +3638,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -2735,10 +3923,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -2921,10 +4110,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -2987,10 +4177,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -3018,7 +4208,7 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * @@ -3171,9 +4361,6 @@ union bpf_attr { * **-EOPNOTSUPP** if the operation is not supported, for example * a call from outside of TC ingress. * - * **-ESOCKTNOSUPPORT** if the socket type is not supported - * (reuseport). - * * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) * Description * Helper is overloaded depending on BPF program type. This @@ -3241,7 +4428,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -3310,12 +4497,20 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * An adaptive notification is a notification sent whenever the user-space + * process has caught up and consumed all available payloads. In case the user-space + * process is still processing a previous payload, then no notification is needed + * as it will process the newly added payload automatically. * Return * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description * Reserve *size* bytes of payload in a ring buffer *ringbuf*. + * *flags* must be 0. * Return * Valid pointer with *size* bytes of memory available; NULL, * otherwise. @@ -3327,6 +4522,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -3337,6 +4536,10 @@ union bpf_attr { * of new data availability is sent. * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. * Return * Nothing. Always succeeds. * @@ -3422,6 +4625,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -3433,6 +4638,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. @@ -3448,8 +4654,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack=<new value> * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description @@ -3542,7 +4748,7 @@ union bpf_attr { * * **-EEXIST** if the option already exists. * - * **-EFAULT** on failrue to parse the existing header options. + * **-EFAULT** on failure to parse the existing header options. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. @@ -3736,178 +4942,1074 @@ union bpf_attr { * going through the CPU's backlog queue. * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types at the ingress - * hook and for veth device types. The peer device must reside in a - * different network namespace. + * currently only supported for tc BPF program types at the + * ingress hook and for veth and netkit target device types. The + * peer device must reside in a different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be a task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's available). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + * Check packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing a *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, which is why it is not considered + * a failure. Other BPF helpers are needed for performing the + * planned size change; therefore the responsibility for catching + * a negative packet size belongs in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * On input *mtu_len* must be a valid pointer, else verifier will + * reject BPF program. If the value *mtu_len* is initialized to + * zero then the ctx packet size is use. When value *mtu_len* is + * provided as input this specify the L3 length that the MTU check + * is done against. Remember XDP and TC length operate at L2, but + * this value is L3 as this correlate to MTU and IP-header tot_len + * values which are L3 (similar behavior as bpf_fib_lookup). + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TC length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. + * + * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) + * Description + * Outputs a string into the **str** buffer of size **str_size** + * based on a format string stored in a read-only map pointed by + * **fmt**. + * + * Each format specifier in **fmt** corresponds to one u64 element + * in the **data** array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. + * + * Formats **%s** and **%p{i,I}{4,6}** require to read kernel + * memory. Reading kernel memory may fail due to either invalid + * address or valid address but requiring a major memory fault. If + * reading kernel memory fails, the string for **%s** will be an + * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. + * Not returning error to bpf program is consistent with what + * **bpf_trace_printk**\ () does for now. + * + * Return + * The strictly positive length of the formatted string, including + * the trailing zero character. If the return value is greater than + * **str_size**, **str** contains a truncated string, guaranteed to + * be zero-terminated except when **str_size** is 0. + * + * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * + * When called for kprobe program attached as uprobe it returns + * probe address for both entry and return uprobe. + * + * Return + * Address of the traced function for kprobe. + * 0 for kprobes placed within the function (not at the entry). + * Address of the probe for uprobe and return uprobe. + * + * u64 bpf_get_attach_cookie(void *ctx) + * Description + * Get bpf_cookie value provided (optionally) during the program + * attachment. It might be different for each individual + * attachment, even if BPF program itself is the same. + * Expects BPF program context *ctx* as a first argument. + * + * Supported for the following program types: + * - kprobe/uprobe; + * - tracepoint; + * - perf_event. + * Return + * Value specified by user at BPF link creation/attachment time + * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. + * + * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= argument register count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. + * + * Return + * The number of argument registers of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * Return + * The BPF program's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * + * Note that there is the following corner case where the program exports an error + * via bpf_set_retval but signals success via 'return 1': + * + * bpf_set_retval(-EPERM); + * return 1; + * + * In this case, the BPF program's return value will use helper's -EPERM. This + * still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case. + * + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. + * + * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) + * Description + * Change the __sk_buff->tstamp_type to *tstamp_type* + * and set *tstamp* to the __sk_buff->tstamp together. + * + * If there is no need to change the __sk_buff->tstamp_type, + * the tstamp value can be directly written to __sk_buff->tstamp + * instead. + * + * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that + * will be kept during bpf_redirect_*(). A non zero + * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO + * *tstamp_type*. + * + * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used + * with a zero *tstamp*. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported protocol + * + * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) + * Description + * Returns a calculated IMA hash of the *file*. + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. + * + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). + * + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), \ - FN(skc_to_tcp6_sock), \ - FN(skc_to_tcp_sock), \ - FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), \ - FN(get_task_stack), \ - FN(load_hdr_opt), \ - FN(store_hdr_opt), \ - FN(reserve_hdr_opt), \ - FN(inode_storage_get), \ - FN(inode_storage_delete), \ - FN(d_path), \ - FN(copy_from_user), \ - FN(snprintf_btf), \ - FN(seq_printf_btf), \ - FN(skb_cgroup_classid), \ - FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ - FN(redirect_peer), \ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument + */ +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -3960,6 +6062,12 @@ enum { BPF_F_ZERO_CSUM_TX = (1ULL << 1), BPF_F_DONT_FRAGMENT = (1ULL << 2), BPF_F_SEQ_NUMBER = (1ULL << 3), + BPF_F_NO_TUNNEL_KEY = (1ULL << 4), +}; + +/* BPF_FUNC_skb_get_tunnel_key flags. */ +enum { + BPF_F_TUNINFO_FLAGS = (1ULL << 4), }; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and @@ -3993,6 +6101,9 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7), + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8), }; enum { @@ -4071,12 +6182,32 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_TSTAMP_UNSPEC, + BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ + /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, + * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC + * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + */ +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -4117,6 +6248,9 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u8 tstamp_type; + __u32 :24; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { @@ -4127,8 +6261,15 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ + union { + __u16 tunnel_ext; /* compat */ + __be16 tunnel_flags; + }; __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -4167,6 +6308,11 @@ enum bpf_ret_code { * represented by BPF_REDIRECT above). */ BPF_LWT_REROUTE = 128, + /* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR + * to indicate that no custom dissection was performed, and + * fallback to standard dissector is requested. + */ + BPF_FLOW_DISSECTOR_CONTINUE = 129, }; struct bpf_sock { @@ -4180,7 +6326,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -4249,6 +6396,19 @@ struct bpf_sock_tuple { }; }; +/* (Simplified) user return codes for tcx prog type. + * A valid tcx program must return one of these defined values. All other + * return codes are reserved for future use. Must remain compatible with + * their TC_ACT_* counter-parts. For compatibility in behavior, unknown + * return codes are mapped to TCX_NEXT. + */ +enum tcx_action_base { + TCX_NEXT = -1, + TCX_PASS = 0, + TCX_DROP = 2, + TCX_REDIRECT = 7, +}; + struct bpf_xdp_sock { __u32 queue_id; }; @@ -4355,6 +6515,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 @@ -4395,6 +6569,10 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; + __u32 verified_insns; + __u32 attach_btf_obj_id; + __u32 attach_btf_id; } __attribute__((aligned(8))); struct bpf_map_info { @@ -4412,12 +6590,17 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 btf_vmlinux_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { @@ -4431,6 +6614,8 @@ struct bpf_link_info { } raw_tracepoint; struct { __u32 attach_type; + __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ + __u32 target_btf_id; /* BTF type id inside the object */ } tracing; struct { __u64 cgroup_id; @@ -4439,11 +6624,26 @@ struct bpf_link_info { struct { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ + + /* If the iter specific field is 32 bits, it can be put + * in the first or second union. Otherwise it should be + * put in the second union. + */ union { struct { __u32 map_id; } map; }; + union { + struct { + __u64 cgroup_id; + __u32 order; + } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; + }; } iter; struct { __u32 netns_ino; @@ -4452,6 +6652,72 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + __aligned_u64 addrs; + __u32 count; /* in/out: kprobe_multi function count */ + __u32 flags; + __u64 missed; + __aligned_u64 cookies; + } kprobe_multi; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 path_size; /* in/out: real path size on success, including zero byte */ + __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */ + __u32 flags; + __u32 pid; + } uprobe_multi; + struct { + __u32 type; /* enum bpf_perf_event_type */ + __u32 :32; + union { + struct { + __aligned_u64 file_name; /* in/out */ + __u32 name_len; + __u32 offset; /* offset from file_name */ + __u64 cookie; + } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ + struct { + __aligned_u64 func_name; /* in/out */ + __u32 name_len; + __u32 offset; /* offset from func_name */ + __u64 addr; + __u64 missed; + __u64 cookie; + } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ + struct { + __aligned_u64 tp_name; /* in/out */ + __u32 name_len; + __u32 :32; + __u64 cookie; + } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ + struct { + __u64 config; + __u32 type; + __u32 :32; + __u64 cookie; + } event; /* BPF_PERF_EVENT_EVENT */ + }; + } perf_event; + struct { + __u32 ifindex; + __u32 attach_type; + } tcx; + struct { + __u32 ifindex; + __u32 attach_type; + } netkit; }; } __attribute__((aligned(8))); @@ -4562,6 +6828,7 @@ struct bpf_sock_ops { * the outgoing header has not * been written yet. */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -4747,6 +7014,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; @@ -4847,6 +7115,9 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -4859,6 +7130,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -4872,9 +7144,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ @@ -4889,6 +7165,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -4903,9 +7182,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; @@ -4920,6 +7209,17 @@ struct bpf_redir_neigh { }; }; +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ @@ -4979,6 +7279,34 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_dynptr { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_list_head { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 __opaque[3]; +} __attribute__((aligned(8))); + +struct bpf_rb_root { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_rb_node { + __u64 __opaque[4]; +} __attribute__((aligned(8))); + +struct bpf_refcount { + __u32 __opaque[1]; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. @@ -5006,16 +7334,21 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* @@ -5048,4 +7381,98 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */ +}; + +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), +}; + +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h index 8f95303f9d80..eb1b9d21250c 100644 --- a/tools/include/uapi/linux/bpf_perf_event.h +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -13,6 +13,7 @@ struct bpf_perf_event_data { bpf_user_pt_regs_t regs; __u64 sample_period; + __u64 addr; }; #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 5a667107ad2c..ec1798b6d3ff 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -33,17 +33,17 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -52,28 +52,35 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_MAX BTF_KIND_DATASEC -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) +enum { + BTF_KIND_UNKN = 0, /* Unknown */ + BTF_KIND_INT = 1, /* Integer */ + BTF_KIND_PTR = 2, /* Pointer */ + BTF_KIND_ARRAY = 3, /* Array */ + BTF_KIND_STRUCT = 4, /* Struct */ + BTF_KIND_UNION = 5, /* Union */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ + BTF_KIND_FWD = 7, /* Forward */ + BTF_KIND_TYPEDEF = 8, /* Typedef */ + BTF_KIND_VOLATILE = 9, /* Volatile */ + BTF_KIND_CONST = 10, /* Const */ + BTF_KIND_RESTRICT = 11, /* Restrict */ + BTF_KIND_FUNC = 12, /* Function */ + BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ + BTF_KIND_VAR = 14, /* Variable */ + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +}; /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -169,4 +176,25 @@ struct btf_var_secinfo { __u32 size; }; +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe + * additional information related to the tag applied location. + * If component_idx == -1, the tag is applied to a struct, union, + * variable or function. Otherwise, it is applied to a struct/union + * member or a func argument, and component_idx indicates which member + * or argument (0 ... vlen-1). + */ +struct btf_decl_tag { + __s32 component_idx; +}; + +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index 5ed721ad5b19..a429381e7ca5 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h index c86c3e942df9..47afae3895ec 100644 --- a/tools/include/uapi/linux/ethtool.h +++ b/tools/include/uapi/linux/ethtool.h @@ -48,4 +48,57 @@ struct ethtool_channels { __u32 combined_count; }; +#define ETHTOOL_FWVERS_LEN 32 +#define ETHTOOL_BUSINFO_LEN 32 +#define ETHTOOL_EROMVERS_LEN 32 + +/** + * struct ethtool_drvinfo - general driver and device information + * @cmd: Command number = %ETHTOOL_GDRVINFO + * @driver: Driver short name. This should normally match the name + * in its bus driver structure (e.g. pci_driver::name). Must + * not be an empty string. + * @version: Driver version string; may be an empty string + * @fw_version: Firmware version string; may be an empty string + * @erom_version: Expansion ROM version string; may be an empty string + * @bus_info: Device bus address. This should match the dev_name() + * string for the underlying bus device, if there is one. May be + * an empty string. + * @reserved2: Reserved for future use; see the note on reserved space. + * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and + * %ETHTOOL_SPFLAGS commands; also the number of strings in the + * %ETH_SS_PRIV_FLAGS set + * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS + * command; also the number of strings in the %ETH_SS_STATS set + * @testinfo_len: Number of results returned by the %ETHTOOL_TEST + * command; also the number of strings in the %ETH_SS_TEST set + * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM + * and %ETHTOOL_SEEPROM commands, in bytes + * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS + * command, in bytes + * + * Users can use the %ETHTOOL_GSSET_INFO command to get the number of + * strings in any string set (from Linux 2.6.34). + * + * Drivers should set at most @driver, @version, @fw_version and + * @bus_info in their get_drvinfo() implementation. The ethtool + * core fills in the other fields using other driver operations. + */ +struct ethtool_drvinfo { + __u32 cmd; + char driver[32]; + char version[32]; + char fw_version[ETHTOOL_FWVERS_LEN]; + char bus_info[ETHTOOL_BUSINFO_LEN]; + char erom_version[ETHTOOL_EROMVERS_LEN]; + char reserved2[12]; + __u32 n_priv_flags; + __u32 n_stats; + __u32 testinfo_len; + __u32 eedump_len; + __u32 regdump_len; +}; + +#define ETHTOOL_GDRVINFO 0x00000003 + #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 2f86b2ad6d7e..282e90aeb163 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -43,6 +43,7 @@ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ +#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */ /* (1U << 31) is reserved for signed error codes */ /* @@ -111,4 +112,12 @@ #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ +#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to + compare object identity and may not + be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index f44eb0a04afd..48ad69f7722e 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -90,7 +90,7 @@ struct file_dedupe_range { __u16 dest_count; /* in - total elements in info array */ __u16 reserved1; /* must be zero */ __u32 reserved2; /* must be zero */ - struct file_dedupe_range_info info[0]; + struct file_dedupe_range_info info[]; }; /* And dynamically-tunable limits and defaults: */ @@ -184,8 +184,9 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ @@ -304,4 +305,64 @@ typedef int __bitwise __kernel_rwf_t; #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND) +/* Pagemap ioctl */ +#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) + +/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ +#define PAGE_IS_WPALLOWED (1 << 0) +#define PAGE_IS_WRITTEN (1 << 1) +#define PAGE_IS_FILE (1 << 2) +#define PAGE_IS_PRESENT (1 << 3) +#define PAGE_IS_SWAPPED (1 << 4) +#define PAGE_IS_PFNZERO (1 << 5) +#define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) + +/* + * struct page_region - Page region with flags + * @start: Start of the region + * @end: End of the region (exclusive) + * @categories: PAGE_IS_* category bitmask for the region + */ +struct page_region { + __u64 start; + __u64 end; + __u64 categories; +}; + +/* Flags for PAGEMAP_SCAN ioctl */ +#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */ +#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */ + +/* + * struct pm_scan_arg - Pagemap ioctl argument + * @size: Size of the structure + * @flags: Flags for the IOCTL + * @start: Starting address of the region + * @end: Ending address of the region + * @walk_end Address where the scan stopped (written by kernel). + * walk_end == end (address tags cleared) informs that the scan completed on entire range. + * @vec: Address of page_region struct array for output + * @vec_len: Length of the page_region struct array + * @max_pages: Optional limit for number of returned pages (0 = disabled) + * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1 + * @category_mask: Skip pages for which any category doesn't match + * @category_anyof_mask: Skip pages for which no category matches + * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned + */ +struct pm_scan_arg { + __u64 size; + __u64 flags; + __u64 start; + __u64 end; + __u64 walk_end; + __u64 vec; + __u64 vec_len; + __u64 max_pages; + __u64 category_inverted; + __u64 category_mask; + __u64 category_anyof_mask; + __u64 return_mask; +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..7a8f4c290187 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -20,15 +20,17 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 #define FSCRYPT_MODE_AES_256_CTS 4 #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 +#define FSCRYPT_MODE_SM4_XTS 7 +#define FSCRYPT_MODE_SM4_CTS 8 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +#define FSCRYPT_MODE_AES_256_HCTR2 10 +/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -45,7 +47,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -70,7 +71,8 @@ struct fscrypt_policy_v2 { __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; @@ -156,9 +158,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +172,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 @@ -177,7 +180,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ @@ -185,8 +188,6 @@ struct fscrypt_get_key_status_arg { #define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS #define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC #define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */ -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */ #define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM #define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX #define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h index 965e4d8606d8..1575d3ca6f0d 100644 --- a/tools/include/uapi/linux/hw_breakpoint.h +++ b/tools/include/uapi/linux/hw_breakpoint.h @@ -22,14 +22,4 @@ enum { HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, }; -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; - #endif /* _UAPI_LINUX_HW_BREAKPOINT_H */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 781e482dc499..f0d71b2a3f1e 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,204 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +245,24 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; + __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; +}; - __u64 rx_nohandler; /* dropped, no handler found */ +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; }; /* The struct should be in sync with struct ifmap */ @@ -170,12 +360,38 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, + IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + + IFLA_DEVLINK_PORT, + + IFLA_GSO_IPV4_MAX_SIZE, + IFLA_GRO_IPV4_MAX_SIZE, + IFLA_DPLL_PIN, __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -230,6 +446,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; @@ -292,6 +509,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -345,6 +563,14 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, + IFLA_BRPORT_MAB, + IFLA_BRPORT_MCAST_N_GROUPS, + IFLA_BRPORT_MCAST_MAX_GROUPS, + IFLA_BRPORT_NEIGH_VLAN_SUPPRESS, + IFLA_BRPORT_BACKUP_NHID, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -409,6 +635,9 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, + IFLA_MACVLAN_BC_CUTOFF, __IFLA_MACVLAN_MAX, }; @@ -430,6 +659,7 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -476,6 +706,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; @@ -517,7 +748,79 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 flags; + __u16 reserved2; + __u32 ifindex; +}; + +/* netkit section */ +enum netkit_action { + NETKIT_NEXT = -1, + NETKIT_PASS = 0, + NETKIT_DROP = 2, + NETKIT_REDIRECT = 7, +}; + +enum netkit_mode { + NETKIT_L2, + NETKIT_L3, +}; + +enum { + IFLA_NETKIT_UNSPEC, + IFLA_NETKIT_PEER_INFO, + IFLA_NETKIT_PRIMARY, + IFLA_NETKIT_POLICY, + IFLA_NETKIT_PEER_POLICY, + IFLA_NETKIT_MODE, + __IFLA_NETKIT_MAX, +}; +#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) + /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -549,6 +852,8 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ + IFLA_VXLAN_LOCALBYPASS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -582,6 +887,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) @@ -594,6 +900,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -615,6 +933,8 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) @@ -651,6 +971,10 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, + IFLA_BOND_COUPLED_CONTROL, __IFLA_BOND_MAX, }; @@ -678,6 +1002,7 @@ enum { IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, __IFLA_BOND_SLAVE_MAX, }; @@ -895,7 +1220,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -905,6 +1237,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; @@ -937,6 +1272,17 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] @@ -954,10 +1300,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) @@ -1029,6 +1386,8 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1044,4 +1403,24 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h index 454ae31b93c7..2ec07de1d73b 100644 --- a/tools/include/uapi/linux/if_tun.h +++ b/tools/include/uapi/linux/if_tun.h @@ -108,7 +108,7 @@ struct tun_pi { struct tun_filter { __u16 flags; /* TUN_FLT_ flags see above */ __u16 count; /* Number of addresses */ - __u8 addr[0][ETH_ALEN]; + __u8 addr[][ETH_ALEN]; }; #endif /* _UAPI__IF_TUN_H */ diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index a78a8096f4ce..638c606dfa74 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -25,9 +25,21 @@ * application. */ #define XDP_USE_NEED_WAKEUP (1 << 3) +/* By setting this option, userspace application indicates that it can + * handle multiple descriptors per packet thus enabling AF_XDP to split + * multi-buffer XDP frames into multiple Rx descriptors. Without this set + * such frames will be dropped. + */ +#define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; @@ -70,6 +82,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { @@ -99,6 +112,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of union xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of union + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -108,4 +156,14 @@ struct xdp_desc { /* UMEM descriptor is __u64 */ +/* Flag indicating that the packet continues with the buffer pointed out by the + * next frame in the ring. The end of the packet is signalled by setting this + * bit to zero. For single buffer packets, every descriptor has 'options' set + * to 0 and this maintains backward compatibility. + */ +#define XDP_PKT_CONTD (1 << 0) + +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) + #endif /* _LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 7d6687618d80..e682ab628dfa 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -20,6 +20,7 @@ #define _UAPI_LINUX_IN_H #include <linux/types.h> +#include <linux/stddef.h> #include <linux/libc-compat.h> #include <linux/socket.h> @@ -68,6 +69,8 @@ enum { #define IPPROTO_PIM IPPROTO_PIM IPPROTO_COMP = 108, /* Compression Header Protocol */ #define IPPROTO_COMP IPPROTO_COMP + IPPROTO_L2TP = 115, /* Layer 2 Tunnelling Protocol */ +#define IPPROTO_L2TP IPPROTO_L2TP IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ #define IPPROTO_SCTP IPPROTO_SCTP IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ @@ -159,6 +162,8 @@ struct in_addr { #define MCAST_MSFILTER 48 #define IP_MULTICAST_ALL 49 #define IP_UNICAST_IF 50 +#define IP_LOCAL_PORT_RANGE 51 +#define IP_PROTOCOL 52 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 @@ -192,7 +197,10 @@ struct ip_msfilter { __be32 imsf_interface; __u32 imsf_fmode; __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + __be32 imsf_slist[1]; + __DECLARE_FLEX_ARRAY(__be32, imsf_slist_flex); + }; }; #define IP_MSFILTER_SIZE(numsrc) \ @@ -211,11 +219,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ @@ -289,6 +308,9 @@ struct sockaddr_in { /* Address indicating an error return. */ #define INADDR_NONE ((unsigned long int) 0xffffffff) +/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ +#define INADDR_DUMMY ((unsigned long int) 0xc0000008) + /* Network number for local host loopback. */ #define IN_LOOPBACKNET 127 diff --git a/tools/include/uapi/linux/io_uring.h b/tools/include/uapi/linux/io_uring.h new file mode 100644 index 000000000000..f1c16f817742 --- /dev/null +++ b/tools/include/uapi/linux/io_uring.h @@ -0,0 +1,757 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include <linux/fs.h> +#include <linux/types.h> +/* + * this file is shared with liburing and that has to autodetect + * if linux/time_types.h is available or not, it can + * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H + * if linux/time_types.h is not available + */ +#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H +#include <linux/time_types.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + union { + __u64 off; /* offset into file */ + __u64 addr2; + struct { + __u32 cmd_op; + __u32 __pad1; + }; + }; + union { + __u64 addr; /* pointer to buffer or iovecs */ + __u64 splice_off_in; + struct { + __u32 level; + __u32 optname; + }; + }; + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; + __u32 hardlink_flags; + __u32 xattr_flags; + __u32 msg_ring_flags; + __u32 uring_cmd_flags; + __u32 waitid_flags; + __u32 futex_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + /* pack this to avoid bogus arm OABI complaints */ + union { + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + union { + __s32 splice_fd_in; + __u32 file_index; + __u32 optlen; + struct { + __u16 addr_len; + __u16 __pad3[1]; + }; + }; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + __u64 optval; + /* + * If the ring is initialized with IORING_SETUP_SQE128, then + * this field is used for 80 bytes of arbitrary command data + */ + __u8 cmd[0]; + }; +}; + +/* + * If sqe->file_index is set to this for opcodes that instantiate a new + * direct descriptor (like openat/openat2/accept), then io_uring will allocate + * an available direct descriptor instead of having the application pass one + * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE + * if the space is full. + */ +#define IORING_FILE_INDEX_ALLOC (~0U) + +enum { + IOSQE_FIXED_FILE_BIT, + IOSQE_IO_DRAIN_BIT, + IOSQE_IO_LINK_BIT, + IOSQE_IO_HARDLINK_BIT, + IOSQE_ASYNC_BIT, + IOSQE_BUFFER_SELECT_BIT, + IOSQE_CQE_SKIP_SUCCESS_BIT, +}; + +/* + * sqe->flags + */ +/* use fixed fileset */ +#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) +/* issue after inflight IO */ +#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) +/* links next sqe */ +#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) +/* like LINK, but stronger */ +#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) +/* always go async */ +#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) +/* select buffer from sqe->buf_group */ +#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) +/* don't post CQE if request succeeded */ +#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) + +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ +#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ +#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ +#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) +#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ +#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ +/* + * Only one task is allowed to submit requests + */ +#define IORING_SETUP_SINGLE_ISSUER (1U << 12) + +/* + * Defer running task work to get events. + * Rather than running bits of task work whenever the task transitions + * try to do it just before it is needed. + */ +#define IORING_SETUP_DEFER_TASKRUN (1U << 13) + +/* + * Application provides the memory for the rings + */ +#define IORING_SETUP_NO_MMAP (1U << 14) + +/* + * Register the ring fd in itself for use with + * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather + * than an fd. + */ +#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) + +/* + * Removes indirection through the SQ index array. + */ +#define IORING_SETUP_NO_SQARRAY (1U << 16) + +enum io_uring_op { + IORING_OP_NOP, + IORING_OP_READV, + IORING_OP_WRITEV, + IORING_OP_FSYNC, + IORING_OP_READ_FIXED, + IORING_OP_WRITE_FIXED, + IORING_OP_POLL_ADD, + IORING_OP_POLL_REMOVE, + IORING_OP_SYNC_FILE_RANGE, + IORING_OP_SENDMSG, + IORING_OP_RECVMSG, + IORING_OP_TIMEOUT, + IORING_OP_TIMEOUT_REMOVE, + IORING_OP_ACCEPT, + IORING_OP_ASYNC_CANCEL, + IORING_OP_LINK_TIMEOUT, + IORING_OP_CONNECT, + IORING_OP_FALLOCATE, + IORING_OP_OPENAT, + IORING_OP_CLOSE, + IORING_OP_FILES_UPDATE, + IORING_OP_STATX, + IORING_OP_READ, + IORING_OP_WRITE, + IORING_OP_FADVISE, + IORING_OP_MADVISE, + IORING_OP_SEND, + IORING_OP_RECV, + IORING_OP_OPENAT2, + IORING_OP_EPOLL_CTL, + IORING_OP_SPLICE, + IORING_OP_PROVIDE_BUFFERS, + IORING_OP_REMOVE_BUFFERS, + IORING_OP_TEE, + IORING_OP_SHUTDOWN, + IORING_OP_RENAMEAT, + IORING_OP_UNLINKAT, + IORING_OP_MKDIRAT, + IORING_OP_SYMLINKAT, + IORING_OP_LINKAT, + IORING_OP_MSG_RING, + IORING_OP_FSETXATTR, + IORING_OP_SETXATTR, + IORING_OP_FGETXATTR, + IORING_OP_GETXATTR, + IORING_OP_SOCKET, + IORING_OP_URING_CMD, + IORING_OP_SEND_ZC, + IORING_OP_SENDMSG_ZC, + IORING_OP_READ_MULTISHOT, + IORING_OP_WAITID, + IORING_OP_FUTEX_WAIT, + IORING_OP_FUTEX_WAKE, + IORING_OP_FUTEX_WAITV, + + /* this goes last, obviously */ + IORING_OP_LAST, +}; + +/* + * sqe->uring_cmd_flags top 8bits aren't available for userspace + * IORING_URING_CMD_FIXED use registered buffer; pass this flag + * along with setting sqe->buf_index. + */ +#define IORING_URING_CMD_FIXED (1U << 0) +#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED + + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) + +/* + * sqe->timeout_flags + */ +#define IORING_TIMEOUT_ABS (1U << 0) +#define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_LINK_TIMEOUT_UPDATE (1U << 4) +#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) +#define IORING_TIMEOUT_MULTISHOT (1U << 6) +#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) +#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) +/* + * sqe->splice_flags + * extends splice(2) flags + */ +#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ + +/* + * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the + * command flags for POLL_ADD are stored in sqe->len. + * + * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if + * the poll handler will continue to report + * CQEs on behalf of the same SQE. + * + * IORING_POLL_UPDATE Update existing poll request, matching + * sqe->addr as the old user_data field. + * + * IORING_POLL_LEVEL Level triggered poll. + */ +#define IORING_POLL_ADD_MULTI (1U << 0) +#define IORING_POLL_UPDATE_EVENTS (1U << 1) +#define IORING_POLL_UPDATE_USER_DATA (1U << 2) +#define IORING_POLL_ADD_LEVEL (1U << 3) + +/* + * ASYNC_CANCEL flags. + * + * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key + * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the + * request 'user_data' + * IORING_ASYNC_CANCEL_ANY Match any request + * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor + * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key + * IORING_ASYNC_CANCEL_OP Match request based on opcode + */ +#define IORING_ASYNC_CANCEL_ALL (1U << 0) +#define IORING_ASYNC_CANCEL_FD (1U << 1) +#define IORING_ASYNC_CANCEL_ANY (1U << 2) +#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) +#define IORING_ASYNC_CANCEL_USERDATA (1U << 4) +#define IORING_ASYNC_CANCEL_OP (1U << 5) + +/* + * send/sendmsg and recv/recvmsg flags (sqe->ioprio) + * + * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send + * or receive and arm poll if that yields an + * -EAGAIN result, arm poll upfront and skip + * the initial transfer attempt. + * + * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if + * the handler will continue to report + * CQEs on behalf of the same SQE. + * + * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in + * the buf_index field. + * + * IORING_SEND_ZC_REPORT_USAGE + * If set, SEND[MSG]_ZC should report + * the zerocopy usage in cqe.res + * for the IORING_CQE_F_NOTIF cqe. + * 0 is reported if zerocopy was actually possible. + * IORING_NOTIF_USAGE_ZC_COPIED if data was copied + * (at least partially). + */ +#define IORING_RECVSEND_POLL_FIRST (1U << 0) +#define IORING_RECV_MULTISHOT (1U << 1) +#define IORING_RECVSEND_FIXED_BUF (1U << 2) +#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) + +/* + * cqe.res for IORING_CQE_F_NOTIF if + * IORING_SEND_ZC_REPORT_USAGE was requested + * + * It should be treated as a flag, all other + * bits of cqe.res should be treated as reserved! + */ +#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) + +/* + * accept flags stored in sqe->ioprio + */ +#define IORING_ACCEPT_MULTISHOT (1U << 0) + +/* + * IORING_OP_MSG_RING command types, stored in sqe->addr + */ +enum { + IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ + IORING_MSG_SEND_FD, /* send a registered fd to another ring */ +}; + +/* + * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) + * + * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not + * applicable for IORING_MSG_DATA, obviously. + */ +#define IORING_MSG_RING_CQE_SKIP (1U << 0) +/* Pass through the flags from sqe->file_index to cqe->flags */ +#define IORING_MSG_RING_FLAGS_PASS (1U << 1) + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; + + /* + * If the ring is initialized with IORING_SETUP_CQE32, then this field + * contains 16-bytes of padding, doubling the size of the CQE. + */ + __u64 big_cqe[]; +}; + +/* + * cqe->flags + * + * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID + * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv + * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct + * them from sends. + */ +#define IORING_CQE_F_BUFFER (1U << 0) +#define IORING_CQE_F_MORE (1U << 1) +#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) +#define IORING_CQE_F_NOTIF (1U << 3) + +enum { + IORING_CQE_BUFFER_SHIFT = 16, +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL +#define IORING_OFF_PBUF_RING 0x80000000ULL +#define IORING_OFF_PBUF_SHIFT 16 +#define IORING_OFF_MMAP_MASK 0xf8000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 user_addr; +}; + +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ +#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u32 flags; + __u32 resv1; + __u64 user_addr; +}; + +/* + * cq_ring->flags + */ + +/* disable eventfd notifications */ +#define IORING_CQ_EVENTFD_DISABLED (1U << 0) + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_REGISTERED_RING (1U << 4) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 features; + __u32 wq_fd; + __u32 resv[3]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +/* + * io_uring_params->features flags + */ +#define IORING_FEAT_SINGLE_MMAP (1U << 0) +#define IORING_FEAT_NODROP (1U << 1) +#define IORING_FEAT_SUBMIT_STABLE (1U << 2) +#define IORING_FEAT_RW_CUR_POS (1U << 3) +#define IORING_FEAT_CUR_PERSONALITY (1U << 4) +#define IORING_FEAT_FAST_POLL (1U << 5) +#define IORING_FEAT_POLL_32BITS (1U << 6) +#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) +#define IORING_FEAT_EXT_ARG (1U << 8) +#define IORING_FEAT_NATIVE_WORKERS (1U << 9) +#define IORING_FEAT_RSRC_TAGS (1U << 10) +#define IORING_FEAT_CQE_SKIP (1U << 11) +#define IORING_FEAT_LINKED_FILE (1U << 12) +#define IORING_FEAT_REG_REG_RING (1U << 13) + +/* + * io_uring_register(2) opcodes and arguments + */ +enum { + IORING_REGISTER_BUFFERS = 0, + IORING_UNREGISTER_BUFFERS = 1, + IORING_REGISTER_FILES = 2, + IORING_UNREGISTER_FILES = 3, + IORING_REGISTER_EVENTFD = 4, + IORING_UNREGISTER_EVENTFD = 5, + IORING_REGISTER_FILES_UPDATE = 6, + IORING_REGISTER_EVENTFD_ASYNC = 7, + IORING_REGISTER_PROBE = 8, + IORING_REGISTER_PERSONALITY = 9, + IORING_UNREGISTER_PERSONALITY = 10, + IORING_REGISTER_RESTRICTIONS = 11, + IORING_REGISTER_ENABLE_RINGS = 12, + + /* extended with tagging */ + IORING_REGISTER_FILES2 = 13, + IORING_REGISTER_FILES_UPDATE2 = 14, + IORING_REGISTER_BUFFERS2 = 15, + IORING_REGISTER_BUFFERS_UPDATE = 16, + + /* set/clear io-wq thread affinities */ + IORING_REGISTER_IOWQ_AFF = 17, + IORING_UNREGISTER_IOWQ_AFF = 18, + + /* set/get max number of io-wq workers */ + IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + + /* register/unregister io_uring fd with the ring */ + IORING_REGISTER_RING_FDS = 20, + IORING_UNREGISTER_RING_FDS = 21, + + /* register ring based provide buffer group */ + IORING_REGISTER_PBUF_RING = 22, + IORING_UNREGISTER_PBUF_RING = 23, + + /* sync cancelation API */ + IORING_REGISTER_SYNC_CANCEL = 24, + + /* register a range of fixed file slots for automatic slot allocation */ + IORING_REGISTER_FILE_ALLOC_RANGE = 25, + + /* this goes last */ + IORING_REGISTER_LAST, + + /* flag added to the opcode to use a registered ring fd */ + IORING_REGISTER_USE_REGISTERED_RING = 1U << 31 +}; + +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + +/* deprecated, see struct io_uring_rsrc_update */ +struct io_uring_files_update { + __u32 offset; + __u32 resv; + __aligned_u64 /* __s32 * */ fds; +}; + +/* + * Register a fully sparse file space, rather than pass in an array of all + * -1 file descriptors. + */ +#define IORING_RSRC_REGISTER_SPARSE (1U << 0) + +struct io_uring_rsrc_register { + __u32 nr; + __u32 flags; + __u64 resv2; + __aligned_u64 data; + __aligned_u64 tags; +}; + +struct io_uring_rsrc_update { + __u32 offset; + __u32 resv; + __aligned_u64 data; +}; + +struct io_uring_rsrc_update2 { + __u32 offset; + __u32 resv; + __aligned_u64 data; + __aligned_u64 tags; + __u32 nr; + __u32 resv2; +}; + +/* Skip updating fd indexes set to this value in the fd table */ +#define IORING_REGISTER_FILES_SKIP (-2) + +#define IO_URING_OP_SUPPORTED (1U << 0) + +struct io_uring_probe_op { + __u8 op; + __u8 resv; + __u16 flags; /* IO_URING_OP_* flags */ + __u32 resv2; +}; + +struct io_uring_probe { + __u8 last_op; /* last opcode supported */ + __u8 ops_len; /* length of ops[] array below */ + __u16 resv; + __u32 resv2[3]; + struct io_uring_probe_op ops[]; +}; + +struct io_uring_restriction { + __u16 opcode; + union { + __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ + __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ + __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ + }; + __u8 resv; + __u32 resv2[3]; +}; + +struct io_uring_buf { + __u64 addr; + __u32 len; + __u16 bid; + __u16 resv; +}; + +struct io_uring_buf_ring { + union { + /* + * To avoid spilling into more pages than we need to, the + * ring tail is overlaid with the io_uring_buf->resv field. + */ + struct { + __u64 resv1; + __u32 resv2; + __u16 resv3; + __u16 tail; + }; + __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs); + }; +}; + +/* + * Flags for IORING_REGISTER_PBUF_RING. + * + * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. + * The application must not set a ring_addr in struct + * io_uring_buf_reg, instead it must subsequently call + * mmap(2) with the offset set as: + * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) + * to get a virtual mapping for the ring. + */ +enum { + IOU_PBUF_RING_MMAP = 1, +}; + +/* argument for IORING_(UN)REGISTER_PBUF_RING */ +struct io_uring_buf_reg { + __u64 ring_addr; + __u32 ring_entries; + __u16 bgid; + __u16 flags; + __u64 resv[3]; +}; + +/* + * io_uring_restriction->opcode values + */ +enum { + /* Allow an io_uring_register(2) opcode */ + IORING_RESTRICTION_REGISTER_OP = 0, + + /* Allow an sqe opcode */ + IORING_RESTRICTION_SQE_OP = 1, + + /* Allow sqe flags */ + IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, + + /* Require sqe flags (these flags must be set on each submission) */ + IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, + + IORING_RESTRICTION_LAST +}; + +struct io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + +/* + * Argument for IORING_REGISTER_SYNC_CANCEL + */ +struct io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u8 opcode; + __u8 pad[7]; + __u64 pad2[3]; +}; + +/* + * Argument for IORING_REGISTER_FILE_ALLOC_RANGE + * The range is specified as [off, off + len) + */ +struct io_uring_file_index_range { + __u32 off; + __u32 len; + __u64 resv; +}; + +struct io_uring_recvmsg_out { + __u32 namelen; + __u32 controllen; + __u32 payloadlen; + __u32 flags; +}; + +/* + * Argument for IORING_OP_URING_CMD when file is a socket + */ +enum { + SOCKET_URING_OP_SIOCINQ = 0, + SOCKET_URING_OP_SIOCOUTQ, + SOCKET_URING_OP_GETSOCKOPT, + SOCKET_URING_OP_SETSOCKOPT, +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..c3308536482b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -8,6 +8,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ +#include <linux/const.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/ioctl.h> @@ -15,100 +16,36 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - - -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { +/* for KVM_SET_USER_MEMORY_REGION */ +struct kvm_userspace_memory_region { __u32 slot; __u32 flags; __u64 guest_phys_addr; __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ }; -/* for KVM_SET_USER_MEMORY_REGION */ -struct kvm_userspace_memory_region { +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { __u32 slot; __u32 flags; __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; /* start of the userspace allocated memory */ + __u64 memory_size; + __u64 userspace_addr; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + * in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -216,6 +153,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -248,6 +199,17 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_RISCV_CSR 36 +#define KVM_EXIT_NOTIFY 37 +#define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -259,6 +221,9 @@ struct kvm_hyperv_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -317,13 +282,25 @@ struct kvm_run { __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_LOONGARCH_IOCSR */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } iocsr_io; /* KVM_EXIT_HYPERCALL */ struct { __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + + union { +#ifndef __KERNEL__ + __u32 longmode; +#endif + __u64 flags; + }; } hypercall; /* KVM_EXIT_TPR_ACCESS */ struct { @@ -362,6 +339,35 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; + /* Arbitrary debug data may follow. */ + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -390,8 +396,17 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 +#define KVM_SYSTEM_EVENT_SUSPEND 5 +#define KVM_SYSTEM_EVENT_SEV_TERM 6 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -413,6 +428,48 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) +#define KVM_MSR_EXIT_REASON_VALID_MASK (KVM_MSR_EXIT_REASON_INVAL | \ + KVM_MSR_EXIT_REASON_UNKNOWN | \ + KVM_MSR_EXIT_REASON_FILTER) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; + /* KVM_EXIT_RISCV_CSR */ + struct { + unsigned long csr_num; + unsigned long new_value; + unsigned long write_mask; + unsigned long ret_value; + } riscv_csr; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -459,7 +516,7 @@ struct kvm_coalesced_mmio { struct kvm_coalesced_mmio_ring { __u32 first, last; - struct kvm_coalesced_mmio coalesced_mmio[0]; + struct kvm_coalesced_mmio coalesced_mmio[]; }; #define KVM_COALESCED_MMIO_MAX \ @@ -488,9 +545,14 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ + }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ @@ -498,9 +560,18 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) /* for KVM_INTERRUPT */ struct kvm_interrupt { @@ -532,7 +603,7 @@ struct kvm_clear_dirty_log { /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; - __u8 sigset[0]; + __u8 sigset[]; }; /* for KVM_TPR_ACCESS_REPORTING */ @@ -559,6 +630,8 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state; @@ -824,9 +897,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -1037,6 +1107,54 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 +#define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 +#define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 +#define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 +#define KVM_CAP_VM_TSC_CONTROL 214 +#define KVM_CAP_SYSTEM_EVENT_DATA 215 +#define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 +#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 +#define KVM_CAP_S390_ZPCI_OP 221 +#define KVM_CAP_S390_CPU_TOPOLOGY 222 +#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 +#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 +#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 +#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 +#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 #ifdef KVM_CAP_IRQ_ROUTING @@ -1068,11 +1186,20 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +struct kvm_irq_routing_xen_evtchn { + __u32 port; + __u32 vcpu; + __u32 priority; +}; + +#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1084,6 +1211,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; + struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1091,7 +1219,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing { __u32 nr; __u32 flags; - struct kvm_irq_routing_entry entries[0]; + struct kvm_irq_routing_entry entries[]; }; #endif @@ -1110,6 +1238,15 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1143,11 +1280,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ @@ -1184,6 +1326,7 @@ struct kvm_dirty_tlb { #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_RISCV 0x8000000000000000ULL +#define KVM_REG_LOONGARCH 0x9000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL @@ -1199,7 +1342,7 @@ struct kvm_dirty_tlb { struct kvm_reg_list { __u64 n; /* number of regs */ - __u64 reg[0]; + __u64 reg[]; }; struct kvm_one_reg { @@ -1240,9 +1383,16 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_VFIO_GROUP 1 -#define KVM_DEV_VFIO_GROUP_ADD 1 -#define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_FILE 1 + +#define KVM_DEV_VFIO_FILE_ADD 1 +#define KVM_DEV_VFIO_FILE_DEL 2 + +/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */ +#define KVM_DEV_VFIO_GROUP KVM_DEV_VFIO_FILE + +#define KVM_DEV_VFIO_GROUP_ADD KVM_DEV_VFIO_FILE_ADD +#define KVM_DEV_VFIO_GROUP_DEL KVM_DEV_VFIO_FILE_DEL #define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { @@ -1266,6 +1416,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_ARM_PV_TIME, #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_MAX, }; @@ -1275,23 +1427,19 @@ struct kvm_vfio_spapr_tce { }; /* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) -/* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) /* deprecated */ #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { @@ -1316,20 +1464,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1342,12 +1478,10 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* Available with KVM_CAP_PPC_GET_PVINFO */ #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) -/* Available with KVM_CAP_TSC_CONTROL */ +/* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with +* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1377,6 +1511,10 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) +#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) @@ -1396,8 +1534,6 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -1439,7 +1575,7 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) /* - * vcpu version available with KVM_ENABLE_CAP + * vcpu version available with KVM_CAP_ENABLE_CAP * vm version available with KVM_CAP_ENABLE_CAP_VM */ #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) @@ -1495,7 +1631,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1516,6 +1652,55 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + enum pv_cmd_id { KVM_PV_ENABLE, KVM_PV_DISABLE, @@ -1524,6 +1709,10 @@ enum pv_cmd_id { KVM_PV_VERIFY, KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, }; struct kvm_pv_cmd { @@ -1538,6 +1727,115 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + struct { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_HVM_EVTCHN_SEND _IOW(KVMIO, 0xd0, struct kvm_irq_routing_xen_evtchn) + +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1566,6 +1864,10 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1618,6 +1920,51 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1691,4 +2038,193 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +#define KVM_PMU_CAP_DISABLE (1 << 0) + +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * structure kvm or kvm_vcpu. + * @bucket_size: A parameter value used for histogram stats. It is only used + * for linear histogram stats, specifying the size of the bucket; + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 bucket_size; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + +/* Available with KVM_CAP_S390_PROTECTED_DUMP */ +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) + +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + +/* Available with KVM_CAP_S390_ZPCI_OP */ +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h deleted file mode 100644 index 45fcbf99d72e..000000000000 --- a/tools/include/uapi/linux/lirc.h +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson - */ - -#ifndef _LINUX_LIRC_H -#define _LINUX_LIRC_H - -#include <linux/types.h> -#include <linux/ioctl.h> - -#define PULSE_BIT 0x01000000 -#define PULSE_MASK 0x00FFFFFF - -#define LIRC_MODE2_SPACE 0x00000000 -#define LIRC_MODE2_PULSE 0x01000000 -#define LIRC_MODE2_FREQUENCY 0x02000000 -#define LIRC_MODE2_TIMEOUT 0x03000000 - -#define LIRC_VALUE_MASK 0x00FFFFFF -#define LIRC_MODE2_MASK 0xFF000000 - -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) - -#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) -#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) - -#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) -#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) -#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) -#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) - -/* used heavily by lirc userspace */ -#define lirc_t int - -/*** lirc compatible hardware features ***/ - -#define LIRC_MODE2SEND(x) (x) -#define LIRC_SEND2MODE(x) (x) -#define LIRC_MODE2REC(x) ((x) << 16) -#define LIRC_REC2MODE(x) ((x) >> 16) - -#define LIRC_MODE_RAW 0x00000001 -#define LIRC_MODE_PULSE 0x00000002 -#define LIRC_MODE_MODE2 0x00000004 -#define LIRC_MODE_SCANCODE 0x00000008 -#define LIRC_MODE_LIRCCODE 0x00000010 - - -#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) -#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) -#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_SEND_MASK 0x0000003f - -#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 -#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 -#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 - -#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) -#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) -#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) -#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) -#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) - -#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) - -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 -#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 -#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 - -#define LIRC_CAN_MEASURE_CARRIER 0x02000000 -#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 - -#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) -#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) - -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - -/*** IOCTL commands for lirc driver ***/ - -#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) - -#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) -#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) -#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) - -#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) -#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) - -/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ -#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) - -#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) -#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) -/* Note: these can reset the according pulse_width */ -#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) -#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) -#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) -#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) - -/* - * when a timeout != 0 is set the driver will send a - * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is - * never sent, timeout is disabled by default - */ -#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) - -/* 1 enables, 0 disables timeout reports in MODE2 */ -#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) - -/* - * if enabled from the next key press on the driver will send - * LIRC_MODE2_FREQUENCY packets - */ -#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) - -/* - * to set a range use LIRC_SET_REC_CARRIER_RANGE with the - * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound - */ -#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) - -#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) - -/* - * Return the recording timeout, which is either set by - * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. - */ -#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) - -/* - * struct lirc_scancode - decoded scancode with protocol for use with - * LIRC_MODE_SCANCODE - * - * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR - * was decoded. - * @flags: should be 0 for transmit. When receiving scancodes, - * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set - * depending on the protocol - * @rc_proto: see enum rc_proto - * @keycode: the translated keycode. Set to 0 for transmit. - * @scancode: the scancode received or to be sent - */ -struct lirc_scancode { - __u64 timestamp; - __u16 flags; - __u16 rc_proto; - __u32 keycode; - __u64 scancode; -}; - -/* Set if the toggle bit of rc-5 or rc-6 is enabled */ -#define LIRC_SCANCODE_FLAG_TOGGLE 1 -/* Set if this is a nec or sanyo repeat */ -#define LIRC_SCANCODE_FLAG_REPEAT 2 - -/** - * enum rc_proto - the Remote Controller protocol - * - * @RC_PROTO_UNKNOWN: Protocol not known - * @RC_PROTO_OTHER: Protocol known but proprietary - * @RC_PROTO_RC5: Philips RC5 protocol - * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol - * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 - * @RC_PROTO_JVC: JVC protocol - * @RC_PROTO_SONY12: Sony 12 bit protocol - * @RC_PROTO_SONY15: Sony 15 bit protocol - * @RC_PROTO_SONY20: Sony 20 bit protocol - * @RC_PROTO_NEC: NEC protocol - * @RC_PROTO_NECX: Extended NEC protocol - * @RC_PROTO_NEC32: NEC 32 bit protocol - * @RC_PROTO_SANYO: Sanyo protocol - * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard - * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse - * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol - * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol - * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol - * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol - * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol - * @RC_PROTO_SHARP: Sharp protocol - * @RC_PROTO_XMP: XMP protocol - * @RC_PROTO_CEC: CEC protocol - * @RC_PROTO_IMON: iMon Pad protocol - * @RC_PROTO_RCMM12: RC-MM protocol 12 bits - * @RC_PROTO_RCMM24: RC-MM protocol 24 bits - * @RC_PROTO_RCMM32: RC-MM protocol 32 bits - */ -enum rc_proto { - RC_PROTO_UNKNOWN = 0, - RC_PROTO_OTHER = 1, - RC_PROTO_RC5 = 2, - RC_PROTO_RC5X_20 = 3, - RC_PROTO_RC5_SZ = 4, - RC_PROTO_JVC = 5, - RC_PROTO_SONY12 = 6, - RC_PROTO_SONY15 = 7, - RC_PROTO_SONY20 = 8, - RC_PROTO_NEC = 9, - RC_PROTO_NECX = 10, - RC_PROTO_NEC32 = 11, - RC_PROTO_SANYO = 12, - RC_PROTO_MCIR2_KBD = 13, - RC_PROTO_MCIR2_MSE = 14, - RC_PROTO_RC6_0 = 15, - RC_PROTO_RC6_6A_20 = 16, - RC_PROTO_RC6_6A_24 = 17, - RC_PROTO_RC6_6A_32 = 18, - RC_PROTO_RC6_MCE = 19, - RC_PROTO_SHARP = 20, - RC_PROTO_XMP = 21, - RC_PROTO_CEC = 22, - RC_PROTO_IMON = 23, - RC_PROTO_RCMM12 = 24, - RC_PROTO_RCMM24 = 25, - RC_PROTO_RCMM32 = 26, -}; - -#endif diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..a246e11988d5 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -4,6 +4,7 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> +#include <linux/types.h> #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -27,6 +28,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB @@ -40,4 +42,17 @@ #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB +struct cachestat_range { + __u64 off; + __u64 len; +}; + +struct cachestat { + __u64 nr_cache; + __u64 nr_dirty; + __u64 nr_writeback; + __u64 nr_evicted; + __u64 nr_recently_evicted; +}; + #endif /* _UAPI_LINUX_MMAN_H */ diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..ad5478dbad00 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_MOUNT_H #define _UAPI_LINUX_MOUNT_H +#include <linux/types.h> + /* * These are the fs-independent mount-flags: up to 32 flags are supported * @@ -16,6 +18,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 @@ -70,7 +73,9 @@ #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#define MOVE_MOUNT__MASK 0x00000077 +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 /* * fsopen() flags. @@ -95,8 +100,9 @@ enum fsconfig_command { FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ }; /* @@ -116,5 +122,90 @@ enum fsconfig_command { #define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 __spare1; + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 __spare2[50]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h new file mode 100644 index 000000000000..bb65ee840cda --- /dev/null +++ b/tools/include/uapi/linux/netdev.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NETDEV_H +#define _UAPI_LINUX_NETDEV_H + +#define NETDEV_FAMILY_NAME "netdev" +#define NETDEV_FAMILY_VERSION 1 + +/** + * enum netdev_xdp_act + * @NETDEV_XDP_ACT_BASIC: XDP features set supported by all drivers + * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT + * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements + * ndo_xdp_xmit callback. + * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP + * in zero copy mode. + * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw + * offloading. + * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear + * XDP buffer support in the driver napi callback. + * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements + * non-linear XDP buffer support in ndo_xdp_xmit callback. + */ +enum netdev_xdp_act { + NETDEV_XDP_ACT_BASIC = 1, + NETDEV_XDP_ACT_REDIRECT = 2, + NETDEV_XDP_ACT_NDO_XMIT = 4, + NETDEV_XDP_ACT_XSK_ZEROCOPY = 8, + NETDEV_XDP_ACT_HW_OFFLOAD = 16, + NETDEV_XDP_ACT_RX_SG = 32, + NETDEV_XDP_ACT_NDO_XMIT_SG = 64, + + /* private: */ + NETDEV_XDP_ACT_MASK = 127, +}; + +/** + * enum netdev_xdp_rx_metadata + * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW + * timestamp via bpf_xdp_metadata_rx_timestamp(). + * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet + * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). + */ +enum netdev_xdp_rx_metadata { + NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, + NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, +}; + +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, +}; + +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + +enum { + NETDEV_A_DEV_IFINDEX = 1, + NETDEV_A_DEV_PAD, + NETDEV_A_DEV_XDP_FEATURES, + NETDEV_A_DEV_XDP_ZC_MAX_SEGS, + NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, + + __NETDEV_A_DEV_MAX, + NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) +}; + +enum { + NETDEV_A_PAGE_POOL_ID = 1, + NETDEV_A_PAGE_POOL_IFINDEX, + NETDEV_A_PAGE_POOL_NAPI_ID, + NETDEV_A_PAGE_POOL_INFLIGHT, + NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + NETDEV_A_PAGE_POOL_DETACH_TIME, + + __NETDEV_A_PAGE_POOL_MAX, + NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) +}; + +enum { + NETDEV_A_PAGE_POOL_STATS_INFO = 1, + NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + + __NETDEV_A_PAGE_POOL_STATS_MAX, + NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) +}; + +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + +enum { + NETDEV_CMD_DEV_GET = 1, + NETDEV_CMD_DEV_ADD_NTF, + NETDEV_CMD_DEV_DEL_NTF, + NETDEV_CMD_DEV_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_GET, + NETDEV_CMD_PAGE_POOL_ADD_NTF, + NETDEV_CMD_PAGE_POOL_DEL_NTF, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, + + __NETDEV_CMD_MAX, + NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) +}; + +#define NETDEV_MCGRP_MGMT "mgmt" +#define NETDEV_MCGRP_PAGE_POOL "page-pool" + +#endif /* _UAPI_LINUX_NETDEV_H */ diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..3a64499b0f5d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -38,6 +38,21 @@ enum perf_type_id { }; /* + * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA + * AA: hardware event ID + * EEEEEEEE: PMU type ID + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB + * BB: hardware cache ID + * CC: hardware cache op ID + * DD: hardware cache op result ID + * EEEEEEEE: PMU type ID + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + */ +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff + +/* * Generalized performance event event_id types, used by the * attr.event_id parameter of the sys_perf_event_open() * syscall: @@ -112,6 +127,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -143,12 +159,14 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ - - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -184,6 +202,10 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -213,6 +235,10 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -231,9 +257,50 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; +/* + * Common branch speculation outcome classification + */ +enum { + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_MAX, +}; + +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -279,6 +346,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -286,6 +354,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -295,8 +364,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ @@ -307,6 +377,8 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -384,7 +456,11 @@ struct perf_event_attr { aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - __reserved_1 : 30; + build_id : 1, /* use build id in mmap2 events */ + inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ + remove_on_exec : 1, /* event is removed from task on exec */ + sigtrap : 1, /* send synchronous SIGTRAP on event */ + __reserved_1 : 26; union { __u32 wakeup_events; /* wakeup every n events */ @@ -436,6 +512,16 @@ struct perf_event_attr { __u16 __reserved_2; __u32 aux_sample_size; __u32 __reserved_3; + + /* + * User provided data if sigtrap=1, passed back to user via + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. + */ + __u64 sig_data; + + __u64 config3; /* extension of config2 */ }; /* @@ -456,7 +542,7 @@ struct perf_event_query_bpf { /* * User provided buffer to store program ids */ - __u32 ids[0]; + __u32 ids[]; }; /* @@ -657,6 +743,22 @@ struct perf_event_mmap_page { __u64 aux_size; }; +/* + * The current state of perf_event_header::misc bits usage: + * ('|' used bit, '-' unused bit) + * + * 012 CDEF + * |||---------|||| + * + * Where: + * 0-2 CPUMODE_MASK + * + * C PROC_MAP_PARSE_TIMEOUT + * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT + * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT + * F (reserved) + */ + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) @@ -688,6 +790,7 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: @@ -697,9 +800,13 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. + * + * PERF_RECORD_MISC_MMAP_BUILD_ID: + * Indicates that mmap2 event carries build id data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) +#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ @@ -879,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi @@ -888,7 +1001,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -896,6 +1026,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, @@ -911,10 +1043,20 @@ enum perf_event_type { * u64 addr; * u64 len; * u64 pgoff; - * u32 maj; - * u32 min; - * u64 ino; - * u64 ino_generation; + * union { + * struct { + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * }; + * struct { + * u8 build_id_size; + * u8 __reserved_1; + * u16 __reserved_2; + * u8 build_id[20]; + * }; + * }; * u32 prot, flags; * char filename[]; * struct sample_id sample_id; @@ -1060,6 +1202,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1101,10 +1258,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) @@ -1123,14 +1285,18 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ - mem_rsvd:24; + mem_blk:3, /* access blocked */ + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:24, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ + mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1153,7 +1319,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ @@ -1177,7 +1349,10 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ @@ -1195,8 +1370,8 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ @@ -1213,6 +1388,20 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1231,6 +1420,7 @@ union perf_mem_data_src { * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) * type: branch type + * spec: branch speculation info (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -1241,7 +1431,32 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + spec:2, /* branch speculation info */ + new_type:4, /* additional branch type */ + priv:3, /* privilege level */ + reserved:31; +}; + +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif }; #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h index 12153771396a..bd4b227ab4ba 100644 --- a/tools/include/uapi/linux/pkt_cls.h +++ b/tools/include/uapi/linux/pkt_cls.h @@ -180,7 +180,7 @@ struct tc_u32_sel { short hoff; __be32 hmask; - struct tc_u32_key keys[0]; + struct tc_u32_key keys[]; }; struct tc_u32_mark { @@ -192,7 +192,7 @@ struct tc_u32_mark { struct tc_u32_pcnt { __u64 rcnt; __u64 rhit; - __u64 kcnts[0]; + __u64 kcnts[]; }; /* Flags */ @@ -204,37 +204,6 @@ struct tc_u32_pcnt { #define TC_U32_MAXDEPTH 8 - -/* RSVP filter */ - -enum { - TCA_RSVP_UNSPEC, - TCA_RSVP_CLASSID, - TCA_RSVP_DST, - TCA_RSVP_SRC, - TCA_RSVP_PINFO, - TCA_RSVP_POLICE, - TCA_RSVP_ACT, - __TCA_RSVP_MAX -}; - -#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) - -struct tc_rsvp_gpi { - __u32 key; - __u32 mask; - int offset; -}; - -struct tc_rsvp_pinfo { - struct tc_rsvp_gpi dpi; - struct tc_rsvp_gpi spi; - __u8 protocol; - __u8 tunnelid; - __u8 tunnelhdr; - __u8 pad; -}; - /* ROUTE filter */ enum { @@ -265,22 +234,6 @@ enum { #define TCA_FW_MAX (__TCA_FW_MAX - 1) -/* TC index filter */ - -enum { - TCA_TCINDEX_UNSPEC, - TCA_TCINDEX_HASH, - TCA_TCINDEX_MASK, - TCA_TCINDEX_SHIFT, - TCA_TCINDEX_FALL_THROUGH, - TCA_TCINDEX_CLASSID, - TCA_TCINDEX_POLICE, - TCA_TCINDEX_ACT, - __TCA_TCINDEX_MAX -}; - -#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) - /* Flow filter */ enum { diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc..587481a19433 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -414,6 +414,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; @@ -456,115 +457,6 @@ enum { #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) - -/* CBQ section */ - -#define TC_CBQ_MAXPRIO 8 -#define TC_CBQ_MAXLEVEL 8 -#define TC_CBQ_DEF_EWMA 5 - -struct tc_cbq_lssopt { - unsigned char change; - unsigned char flags; -#define TCF_CBQ_LSS_BOUNDED 1 -#define TCF_CBQ_LSS_ISOLATED 2 - unsigned char ewma_log; - unsigned char level; -#define TCF_CBQ_LSS_FLAGS 1 -#define TCF_CBQ_LSS_EWMA 2 -#define TCF_CBQ_LSS_MAXIDLE 4 -#define TCF_CBQ_LSS_MINIDLE 8 -#define TCF_CBQ_LSS_OFFTIME 0x10 -#define TCF_CBQ_LSS_AVPKT 0x20 - __u32 maxidle; - __u32 minidle; - __u32 offtime; - __u32 avpkt; -}; - -struct tc_cbq_wrropt { - unsigned char flags; - unsigned char priority; - unsigned char cpriority; - unsigned char __reserved; - __u32 allot; - __u32 weight; -}; - -struct tc_cbq_ovl { - unsigned char strategy; -#define TC_CBQ_OVL_CLASSIC 0 -#define TC_CBQ_OVL_DELAY 1 -#define TC_CBQ_OVL_LOWPRIO 2 -#define TC_CBQ_OVL_DROP 3 -#define TC_CBQ_OVL_RCLASSIC 4 - unsigned char priority2; - __u16 pad; - __u32 penalty; -}; - -struct tc_cbq_police { - unsigned char police; - unsigned char __res1; - unsigned short __res2; -}; - -struct tc_cbq_fopt { - __u32 split; - __u32 defmap; - __u32 defchange; -}; - -struct tc_cbq_xstats { - __u32 borrows; - __u32 overactions; - __s32 avgidle; - __s32 undertime; -}; - -enum { - TCA_CBQ_UNSPEC, - TCA_CBQ_LSSOPT, - TCA_CBQ_WRROPT, - TCA_CBQ_FOPT, - TCA_CBQ_OVL_STRATEGY, - TCA_CBQ_RATE, - TCA_CBQ_RTAB, - TCA_CBQ_POLICE, - __TCA_CBQ_MAX, -}; - -#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) - -/* dsmark section */ - -enum { - TCA_DSMARK_UNSPEC, - TCA_DSMARK_INDICES, - TCA_DSMARK_DEFAULT_INDEX, - TCA_DSMARK_SET_TC_INDEX, - TCA_DSMARK_MASK, - TCA_DSMARK_VALUE, - __TCA_DSMARK_MAX, -}; - -#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) - -/* ATM section */ - -enum { - TCA_ATM_UNSPEC, - TCA_ATM_FD, /* file/socket descriptor */ - TCA_ATM_PTR, /* pointer to descriptor - later */ - TCA_ATM_HDR, /* LL header */ - TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ - TCA_ATM_ADDR, /* PVC address (for output only) */ - TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ - __TCA_ATM_MAX, -}; - -#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) - /* Network emulator */ enum { diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..370ed14b1ae0 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -213,6 +213,7 @@ struct prctl_mm_map { /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 # define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) @@ -233,9 +234,76 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_NONE 0UL +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 + +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 +# define PR_SCHED_CORE_SCOPE_THREAD 0 +# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 + +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + +/* Memory deny write / execute */ +#define PR_SET_MDWE 65 +# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) + +#define PR_GET_MDWE 66 + +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + +#define PR_GET_AUXV 0x41555856 + +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 + +#define PR_RISCV_V_SET_CONTROL 69 +#define PR_RISCV_V_GET_CONTROL 70 +# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0 +# define PR_RISCV_V_VSTATE_CTRL_OFF 1 +# define PR_RISCV_V_VSTATE_CTRL_ON 2 +# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4) +# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3 +# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc +# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h new file mode 100644 index 000000000000..dbfc9b37fcae --- /dev/null +++ b/tools/include/uapi/linux/seccomp.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_SECCOMP_H +#define _UAPI_LINUX_SECCOMP_H + +#include <linux/compiler.h> +#include <linux/types.h> + + +/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */ +#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */ +#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ +#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ + +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 + +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +/* Received notifications wait in killable state (only respond to fatal signals) */ +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) + +/* + * All BPF programs must return a 32-bit value. + * The bottom 16-bits are for optional return data. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). + * + * The ordering ensures that a min_t() over composed return values always + * selects the least permissive choice. + */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +/** + * struct seccomp_data - the format the BPF program executes over. + * @nr: the system call number + * @arch: indicates system call convention as an AUDIT_ARCH_* value + * as defined in <linux/audit.h>. + * @instruction_pointer: at the time of the system call. + * @args: up to 6 system call arguments always stored as 64-bit values + * regardless of the architecture. + */ +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; + +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +/* + * Valid flags for struct seccomp_notif_resp + * + * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution! + * If set by the process supervising the syscalls of another process the + * syscall will continue. This is problematic because of an inherent TOCTOU. + * An attacker can exploit the time while the supervised process is waiting on + * a response from the supervising process to rewrite syscall arguments which + * are passed as pointers of the intercepted syscall. + * It should be absolutely clear that this means that the seccomp notifier + * _cannot_ be used to implement a security policy! It should only ever be used + * in scenarios where a more privileged process supervises the syscalls of a + * lesser privileged process to get around kernel-enforced security + * restrictions when the privileged process deems this safe. In other words, + * in order to continue a syscall the supervising process should be sure that + * another security mechanism or the kernel itself will sufficiently block + * syscalls if arguments are rewritten to something unsafe. + * + * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF + * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the + * same syscall, the most recently added filter takes precedence. This means + * that the new SECCOMP_RET_USER_NOTIF filter can override any + * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all + * such filtered syscalls to be executed by sending the response + * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally + * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE. + */ +#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0) + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ + +/** + * struct seccomp_notif_addfd + * @id: The ID of the seccomp notification + * @flags: SECCOMP_ADDFD_FLAG_* + * @srcfd: The local fd number + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. + * @newfd_flags: The O_* flags the remote FD should have applied + */ +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) + +#endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h index 286e8d6a8e98..f94baf154c47 100644 --- a/tools/include/uapi/linux/seg6.h +++ b/tools/include/uapi/linux/seg6.h @@ -30,7 +30,7 @@ struct ipv6_sr_hdr { __u8 flags; __u16 tag; - struct in6_addr segments[0]; + struct in6_addr segments[]; }; #define SR6_FLAG1_PROTECTED (1 << 6) @@ -49,7 +49,7 @@ struct ipv6_sr_hdr { struct sr6_tlv { __u8 type; __u8 len; - __u8 data[0]; + __u8 data[]; }; #endif diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 82cc58fe9368..2f2ee82d5517 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -124,7 +124,8 @@ struct statx { __u32 stx_dev_minor; /* 0x90 */ __u64 stx_mnt_id; - __u64 __spare2; + __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ /* 0xa0 */ __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ @@ -152,6 +153,8 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ +#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -171,9 +174,12 @@ struct statx { * be of use to ordinary userspace programs such as GUIs or ls rather than * specialised tools. * - * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags * semantically. Where possible, the numerical value is picked to correspond - * also. + * also. Note that the DAX attribute indicates that the file is in the CPU + * direct access state. It does not correspond to the per-inode flag that + * some filesystems support. + * */ #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ @@ -183,7 +189,7 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ +#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h new file mode 100644 index 000000000000..bb6ea517efb5 --- /dev/null +++ b/tools/include/uapi/linux/stddef.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_STDDEF_H +#define _LINUX_STDDEF_H + + + +#ifndef __always_inline +#define __always_inline __inline__ +#endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } +#endif diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h index 653c4f94f76e..fe6c8f8f3e8c 100644 --- a/tools/include/uapi/linux/tc_act/tc_bpf.h +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_BPF_H diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h new file mode 100644 index 000000000000..13ceeb395eb8 --- /dev/null +++ b/tools/include/uapi/linux/tcp.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol. + * + * Version: @(#)tcp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_TCP_H +#define _UAPI_LINUX_TCP_H + +#include <linux/types.h> +#include <asm/byteorder.h> +#include <linux/socket.h> + +struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +}; + +/* + * The union cast uses a gcc extension to avoid aliasing problems + * (union is compatible to any of its members) + * This means this part of the code is -fstrict-aliasing safe now. + */ +union tcp_word_hdr { + struct tcphdr hdr; + __be32 words[5]; +}; + +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) + +enum { + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) +}; + +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + +/* TCP socket options */ +#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ +#define TCP_MAXSEG 2 /* Limit MSS */ +#define TCP_CORK 3 /* Never send partially complete segments */ +#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define TCP_INFO 11 /* Information about this connection. */ +#define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_CONGESTION 13 /* Congestion control algorithm */ +#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ +#define TCP_REPAIR_QUEUE 20 +#define TCP_QUEUE_SEQ 21 +#define TCP_REPAIR_OPTIONS 22 +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 +#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ +#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ +#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ +#define TCP_ZEROCOPY_RECEIVE 35 +#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ + +#define TCP_CM_INQ TCP_INQ + +#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ + + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF 0 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + +struct tcp_repair_opt { + __u32 opt_code; + __u32 opt_val; +}; + +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + +enum { + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/* why fastopen failed from client perspective */ +enum tcp_fastopen_client_fail { + TFO_STATUS_UNSPEC, /* catch-all */ + TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ + TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ + TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ +}; + +/* for TCP_INFO socket option */ +#define TCPI_OPT_TIMESTAMPS 1 +#define TCPI_OPT_SACK 2 +#define TCPI_OPT_WSCALE 4 +#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ +#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ +#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ + +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ +enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ + TCP_CA_Open = 0, +#define TCPF_CA_Open (1<<TCP_CA_Open) + /* + * The sender enters disordered state when it has received DUPACKs or + * SACKs in the last round of packets sent. This could be due to packet + * loss or reordering but needs further information to confirm packets + * have been lost. + */ + TCP_CA_Disorder = 1, +#define TCPF_CA_Disorder (1<<TCP_CA_Disorder) + /* + * The sender enters Congestion Window Reduction (CWR) state when it + * has received ACKs with ECN-ECE marks, or has experienced congestion + * or packet discard on the sender host (e.g. qdisc). + */ + TCP_CA_CWR = 2, +#define TCPF_CA_CWR (1<<TCP_CA_CWR) + /* + * The sender is in fast recovery and retransmitting lost packets, + * typically triggered by ACK events. + */ + TCP_CA_Recovery = 3, +#define TCPF_CA_Recovery (1<<TCP_CA_Recovery) + /* + * The sender is in loss recovery triggered by retransmission timeout. + */ + TCP_CA_Loss = 4 +#define TCPF_CA_Loss (1<<TCP_CA_Loss) +}; + +struct tcp_info { + __u8 tcpi_state; + __u8 tcpi_ca_state; + __u8 tcpi_retransmits; + __u8 tcpi_probes; + __u8 tcpi_backoff; + __u8 tcpi_options; + __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2; + + __u32 tcpi_rto; + __u32 tcpi_ato; + __u32 tcpi_snd_mss; + __u32 tcpi_rcv_mss; + + __u32 tcpi_unacked; + __u32 tcpi_sacked; + __u32 tcpi_lost; + __u32 tcpi_retrans; + __u32 tcpi_fackets; + + /* Times. */ + __u32 tcpi_last_data_sent; + __u32 tcpi_last_ack_sent; /* Not remembered, sorry. */ + __u32 tcpi_last_data_recv; + __u32 tcpi_last_ack_recv; + + /* Metrics. */ + __u32 tcpi_pmtu; + __u32 tcpi_rcv_ssthresh; + __u32 tcpi_rtt; + __u32 tcpi_rttvar; + __u32 tcpi_snd_ssthresh; + __u32 tcpi_snd_cwnd; + __u32 tcpi_advmss; + __u32 tcpi_reordering; + + __u32 tcpi_rcv_rtt; + __u32 tcpi_rcv_space; + + __u32 tcpi_total_retrans; + + __u64 tcpi_pacing_rate; + __u64 tcpi_max_pacing_rate; + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ + + __u32 tcpi_notsent_bytes; + __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; + + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ + + __u32 tcpi_delivered; + __u32 tcpi_delivered_ce; + + __u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + __u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */ + __u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */ + __u32 tcpi_reord_seen; /* reordering events seen */ + + __u32 tcpi_rcv_ooopack; /* Out-of-order packets received */ + + __u32 tcpi_snd_wnd; /* peer's advertised receive window after + * scaling (bytes) + */ +}; + +/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +enum { + TCP_NLA_PAD, + TCP_NLA_BUSY, /* Time (usec) busy sending data */ + TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ + TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ + TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ + TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ + TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */ + TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */ + TCP_NLA_SND_CWND, /* Sending congestion window */ + TCP_NLA_REORDERING, /* Reordering metric */ + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ + TCP_NLA_CA_STATE, /* ca_state of socket */ + TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ + TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ + TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ + TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */ + TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ + TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ + TCP_NLA_REORD_SEEN, /* reordering events seen */ + TCP_NLA_SRTT, /* smoothed RTT in usecs */ + TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ + TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ + TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ +}; + +/* for TCP_MD5SIG socket option */ +#define TCP_MD5SIG_MAXKEYLEN 80 + +/* tcp_md5sig extension flags for TCP_MD5SIG_EXT */ +#define TCP_MD5SIG_FLAG_PREFIX 0x1 /* address prefix length */ +#define TCP_MD5SIG_FLAG_IFINDEX 0x2 /* ifindex set */ + +struct tcp_md5sig { + struct __kernel_sockaddr_storage tcpm_addr; /* address associated */ + __u8 tcpm_flags; /* extension flags */ + __u8 tcpm_prefixlen; /* address prefix */ + __u16 tcpm_keylen; /* key length */ + int tcpm_ifindex; /* device index for scope */ + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ +}; + +/* INET_DIAG_MD5SIG */ +struct tcp_diag_md5sig { + __u8 tcpm_family; + __u8 tcpm_prefixlen; + __u16 tcpm_keylen; + __be32 tcpm_addr[4]; + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; +}; + +/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ + +#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 +struct tcp_zerocopy_receive { + __u64 address; /* in: address of mapping */ + __u32 length; /* in/out: number of bytes to map/mapped */ + __u32 recv_skip_hint; /* out: amount of bytes to skip */ + __u32 inq; /* out: amount of bytes in read queue */ + __s32 err; /* out: socket error */ + __u64 copybuf_address; /* in: copybuf address (small reads) */ + __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ + __u32 flags; /* in: flags */ +}; +#endif /* _UAPI_LINUX_TCP_H */ diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h index cf525cddeb94..74a84e02422a 100644 --- a/tools/include/uapi/linux/usbdevice_fs.h +++ b/tools/include/uapi/linux/usbdevice_fs.h @@ -131,7 +131,7 @@ struct usbdevfs_urb { unsigned int signr; /* signal to be sent on completion, or 0 if none should be sent. */ void __user *usercontext; - struct usbdevfs_iso_packet_desc iso_frame_desc[0]; + struct usbdevfs_iso_packet_desc iso_frame_desc[]; }; /* ioctls for talking directly to drivers */ @@ -176,7 +176,7 @@ struct usbdevfs_disconnect_claim { struct usbdevfs_streams { unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */ unsigned int num_eps; - unsigned char eps[0]; + unsigned char eps[]; }; /* diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..649560c685f1 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -45,6 +45,25 @@ #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +/* By default, a device gets one vhost_worker that its virtqueues share. This + * command allows the owner of the device to create an additional vhost_worker + * for the device. It can later be bound to 1 or more of its virtqueues using + * the VHOST_ATTACH_VRING_WORKER command. + * + * This must be called after VHOST_SET_OWNER and the caller must be the owner + * of the device. The new thread will inherit caller's cgroups and namespaces, + * and will share the caller's memory space. The new thread will also be + * counted against the caller's RLIMIT_NPROC value. + * + * The worker's ID used in other commands will be returned in + * vhost_worker_state. + */ +#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state) +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any + * virtqueue. If userspace is not able to call this for workers its created, + * the kernel will free all the device's workers when the device is closed. + */ +#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state) /* Ring setup. */ /* Set number of descriptors in ring. This parameter can not @@ -70,6 +89,18 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's + * virtqueues. + * + * This will replace the virtqueue's existing worker. If the replaced worker + * is no longer attached to any virtqueues, it can be freed with + * VHOST_FREE_WORKER. + */ +#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \ + struct vhost_vring_worker) +/* Return the vring worker's ID */ +#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \ + struct vhost_vring_worker) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ @@ -89,11 +120,6 @@ /* Set or get vhost backend capability */ -/* Use message type V2 */ -#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 -/* IOTLB can accept batching hints */ -#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 - #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) @@ -146,4 +172,59 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + +/* Get the number of address spaces. */ +#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +/* Get the group for a virtqueue: read index, write group in num, + * The virtqueue index is stored in the index field of + * vhost_vring_state. The group for this specific virtqueue is + * returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ + struct vhost_vring_state) +/* Set the ASID for a virtqueue group. The group index is stored in + * the index field of vhost_vring_state, the ASID associated with this + * group is stored at num field of vhost_vring_state. + */ +#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ + struct vhost_vring_state) + +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + +/* Resume a device so it can resume processing virtqueue requests + * + * After the return of this ioctl the device will have restored all the + * necessary states and it is fully operational to continue processing the + * virtqueue descriptors. + */ +#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + +/* Get the group for the descriptor table including driver & device areas + * of a virtqueue: read index, write group in num. + * The virtqueue index is stored in the index field of vhost_vring_state. + * The group ID of the descriptor table for this specific virtqueue + * is returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) #endif diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 535a7229e1d9..d5b9cfbd9cea 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -3,22 +3,6 @@ * Advanced Linux Sound Architecture - ALSA - Driver * Copyright (c) 1994-2003 by Jaroslav Kysela <perex@perex.cz>, * Abramo Bagnara <abramo@alsa-project.org> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #ifndef _UAPI__SOUND_ASOUND_H @@ -56,8 +40,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ @@ -156,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -202,6 +188,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ +/* + * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the + * available bit count in most significant bit. It's for the case of so-called 'left-justified' or + * `right-padding` sample which has less width than 32 bit. + */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) @@ -276,13 +267,17 @@ typedef int __bitwise snd_pcm_format_t; typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0) -#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD +#define SNDRV_PCM_SUBFORMAT_MSBITS_MAX ((__force snd_pcm_subformat_t) 1) +#define SNDRV_PCM_SUBFORMAT_MSBITS_20 ((__force snd_pcm_subformat_t) 2) +#define SNDRV_PCM_SUBFORMAT_MSBITS_24 ((__force snd_pcm_subformat_t) 3) +#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_MSBITS_24 #define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */ #define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */ #define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */ #define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */ #define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */ +#define SNDRV_PCM_INFO_PERFECT_DRAIN 0x00000040 /* silencing at the end of stream is not required */ #define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */ #define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */ #define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */ @@ -299,7 +294,8 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ - +#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ +#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ @@ -391,6 +387,9 @@ typedef int snd_pcm_hw_param_t; #define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */ #define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */ #define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */ +#define SNDRV_PCM_HW_PARAMS_NO_DRAIN_SILENCE (1<<3) /* suppress drain with the filling + * of the silence samples + */ struct snd_interval { unsigned int min, max; @@ -437,9 +436,14 @@ struct snd_pcm_sw_params { snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */ snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */ snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */ - snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */ - snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */ - snd_pcm_uframes_t silence_size; /* silence block size */ + /* + * The following two thresholds alleviate playback buffer underruns; when + * hw_avail drops below the threshold, the respective action is triggered: + */ + snd_pcm_uframes_t stop_threshold; /* - stop playback */ + snd_pcm_uframes_t silence_threshold; /* - pre-fill buffer with silence */ + snd_pcm_uframes_t silence_size; /* max size of silence pre-fill; when >= boundary, + * fill played area with silence immediately */ snd_pcm_uframes_t boundary; /* pointers wrap point */ unsigned int proto; /* protocol version */ unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */ @@ -578,7 +582,8 @@ struct __snd_pcm_mmap_status64 { struct __snd_pcm_mmap_control64 { __pad_before_uframe __pad1; snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */ - __pad_before_uframe __pad2; + __pad_before_uframe __pad2; // This should be __pad_after_uframe, but binary + // backwards compatibility constraints prevent a fix. __pad_before_uframe __pad3; snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */ @@ -710,7 +715,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 1) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -721,6 +726,7 @@ enum { #define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001 #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 +#define SNDRV_RAWMIDI_INFO_UMP 0x00000008 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -736,12 +742,38 @@ struct snd_rawmidi_info { unsigned char reserved[64]; /* reserved for future use */ }; +#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_SHIFT 0 +#define SNDRV_RAWMIDI_MODE_FRAMING_NONE (0<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP (1<<0) +#define SNDRV_RAWMIDI_MODE_CLOCK_MASK (7<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_SHIFT 3 +#define SNDRV_RAWMIDI_MODE_CLOCK_NONE (0<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_REALTIME (1<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC (2<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW (3<<3) + +#define SNDRV_RAWMIDI_FRAMING_DATA_LENGTH 16 + +struct snd_rawmidi_framing_tstamp { + /* For now, frame_type is always 0. Midi 2.0 is expected to add new + * types here. Applications are expected to skip unknown frame types. + */ + __u8 frame_type; + __u8 length; /* number of valid bytes in data field */ + __u8 reserved[2]; + __u32 tv_nsec; /* nanoseconds */ + __u64 tv_sec; /* seconds */ + __u8 data[SNDRV_RAWMIDI_FRAMING_DATA_LENGTH]; +} __packed; + struct snd_rawmidi_params { int stream; size_t buffer_size; /* queue size in bytes */ size_t avail_min; /* minimum avail bytes for wakeup */ unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */ - unsigned char reserved[16]; /* reserved for future use */ + unsigned int mode; /* For input data only, frame incoming data */ + unsigned char reserved[12]; /* reserved for future use */ }; #ifndef __KERNEL__ @@ -755,12 +787,82 @@ struct snd_rawmidi_status { }; #endif +/* UMP EP info flags */ +#define SNDRV_UMP_EP_INFO_STATIC_BLOCKS 0x01 + +/* UMP EP Protocol / JRTS capability bits */ +#define SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK 0x0300 +#define SNDRV_UMP_EP_INFO_PROTO_MIDI1 0x0100 /* MIDI 1.0 */ +#define SNDRV_UMP_EP_INFO_PROTO_MIDI2 0x0200 /* MIDI 2.0 */ +#define SNDRV_UMP_EP_INFO_PROTO_JRTS_MASK 0x0003 +#define SNDRV_UMP_EP_INFO_PROTO_JRTS_TX 0x0001 /* JRTS Transmit */ +#define SNDRV_UMP_EP_INFO_PROTO_JRTS_RX 0x0002 /* JRTS Receive */ + +/* UMP Endpoint information */ +struct snd_ump_endpoint_info { + int card; /* card number */ + int device; /* device number */ + unsigned int flags; /* additional info */ + unsigned int protocol_caps; /* protocol capabilities */ + unsigned int protocol; /* current protocol */ + unsigned int num_blocks; /* # of function blocks */ + unsigned short version; /* UMP major/minor version */ + unsigned short family_id; /* MIDI device family ID */ + unsigned short model_id; /* MIDI family model ID */ + unsigned int manufacturer_id; /* MIDI manufacturer ID */ + unsigned char sw_revision[4]; /* software revision */ + unsigned short padding; + unsigned char name[128]; /* endpoint name string */ + unsigned char product_id[128]; /* unique product id string */ + unsigned char reserved[32]; +} __packed; + +/* UMP direction */ +#define SNDRV_UMP_DIR_INPUT 0x01 +#define SNDRV_UMP_DIR_OUTPUT 0x02 +#define SNDRV_UMP_DIR_BIDIRECTION 0x03 + +/* UMP block info flags */ +#define SNDRV_UMP_BLOCK_IS_MIDI1 (1U << 0) /* MIDI 1.0 port w/o restrict */ +#define SNDRV_UMP_BLOCK_IS_LOWSPEED (1U << 1) /* 31.25Kbps B/W MIDI1 port */ + +/* UMP block user-interface hint */ +#define SNDRV_UMP_BLOCK_UI_HINT_UNKNOWN 0x00 +#define SNDRV_UMP_BLOCK_UI_HINT_RECEIVER 0x01 +#define SNDRV_UMP_BLOCK_UI_HINT_SENDER 0x02 +#define SNDRV_UMP_BLOCK_UI_HINT_BOTH 0x03 + +/* UMP groups and blocks */ +#define SNDRV_UMP_MAX_GROUPS 16 +#define SNDRV_UMP_MAX_BLOCKS 32 + +/* UMP Block information */ +struct snd_ump_block_info { + int card; /* card number */ + int device; /* device number */ + unsigned char block_id; /* block ID (R/W) */ + unsigned char direction; /* UMP direction */ + unsigned char active; /* Activeness */ + unsigned char first_group; /* first group ID */ + unsigned char num_groups; /* number of groups */ + unsigned char midi_ci_version; /* MIDI-CI support version */ + unsigned char sysex8_streams; /* max number of sysex8 streams */ + unsigned char ui_hint; /* user interface hint */ + unsigned int flags; /* various info flags */ + unsigned char name[128]; /* block name string */ + unsigned char reserved[32]; +} __packed; + #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int) #define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) #define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) #define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) #define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int) +/* Additional ioctls for UMP rawmidi devices */ +#define SNDRV_UMP_IOCTL_ENDPOINT_INFO _IOR('W', 0x40, struct snd_ump_endpoint_info) +#define SNDRV_UMP_IOCTL_BLOCK_INFO _IOR('W', 0x41, struct snd_ump_block_info) /* * Timer section - /dev/snd/timer @@ -936,7 +1038,7 @@ struct snd_timer_tread { * * ****************************************************************************/ -#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) +#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 9) struct snd_ctl_card_info { int card; /* card number */ @@ -974,7 +1076,7 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ -// (1 << 3) is unused. +/* (1 << 3) is unused. */ #define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) @@ -1071,7 +1173,7 @@ struct snd_ctl_elem_value { struct snd_ctl_tlv { unsigned int numid; /* control element numeric identification */ unsigned int length; /* in bytes aligned to 4 */ - unsigned int tlv[0]; /* first TLV */ + unsigned int tlv[]; /* first TLV */ }; #define SNDRV_CTL_IOCTL_PVERSION _IOR('U', 0x00, int) @@ -1097,6 +1199,9 @@ struct snd_ctl_tlv { #define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int) #define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info) #define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int) +#define SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE _IOWR('U', 0x43, int) +#define SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO _IOWR('U', 0x44, struct snd_ump_endpoint_info) +#define SNDRV_CTL_IOCTL_UMP_BLOCK_INFO _IOWR('U', 0x45, struct snd_ump_block_info) #define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int) #define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int) diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h index 6d005a1f5d94..388b212088ea 100644 --- a/tools/include/vdso/bits.h +++ b/tools/include/vdso/bits.h @@ -5,5 +5,6 @@ #include <vdso/const.h> #define BIT(nr) (UL(1) << (nr)) +#define BIT_ULL(nr) (ULL(1) << (nr)) #endif /* __VDSO_BITS_H */ |