/* SPDX-License-Identifier: GPL-2.0 */ /* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. */ #ifndef _LINUX_XSK_QUEUE_H #define _LINUX_XSK_QUEUE_H #include #include #include struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; u32 flags; }; /* Used for the RX and TX queues for packets */ struct xdp_rxtx_ring { struct xdp_ring ptrs; struct xdp_desc desc[0] ____cacheline_aligned_in_smp; }; /* Used for the fill and completion queues for buffers */ struct xdp_umem_ring { struct xdp_ring ptrs; u64 desc[0] ____cacheline_aligned_in_smp; }; struct xsk_queue { u64 chunk_mask; u64 size; u32 ring_mask; u32 nentries; u32 cached_prod; u32 cached_cons; struct xdp_ring *ring; u64 invalid_descs; }; /* The structure of the shared state of the rings are the same as the * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion * ring, the kernel is the producer and user space is the consumer. For * the Tx and fill rings, the kernel is the consumer and user space is * the producer. * * producer consumer * * if (LOAD ->consumer) { LOAD ->producer * (A) smp_rmb() (C) * STORE $data LOAD $data * smp_wmb() (B) smp_mb() (D) * STORE ->producer STORE ->consumer * } * * (A) pairs with (D), and (B) pairs with (C). * * Starting with (B), it protects the data from being written after * the producer pointer. If this barrier was missing, the consumer * could observe the producer pointer being set and thus load the data * before the producer has written the new data. The consumer would in * this case load the old data. * * (C) protects the consumer from speculatively loading the data before * the producer pointer actually has been read. If we do not have this * barrier, some architectures could load old data as speculative loads * are not discarded as the CPU does not know there is a dependency * between ->producer and data. * * (A) is a control dependency that separates the load of ->consumer * from the stores of $data. In case ->consumer indicates there is no * room in the buffer to store $data we do not. So no barrier is needed. * * (D) protects the load of the data to be observed to happen after the * store of the consumer pointer. If we did not have this memory * barrier, the producer could observe the consumer pointer being set * and overwrite the data with a new value before the consumer got the * chance to read the old value. The consumer would thus miss reading * the old entry and very likely read the new entry twice, once right * now and again after circling through the ring. */ /* The operations on the rings are the following: * * producer consumer * * RESERVE entries PEEK in the ring for entries * WRITE data into the ring READ data from the ring * SUBMIT entries RELEASE entries * * The producer reserves one or more entries in the ring. It can then * fill in these entries and finally submit them so that they can be * seen and read by the consumer. * * The consumer peeks into the ring to see if the producer has written * any new entries. If so, the producer can then read these entries * and when it is done reading them release them back to the producer * so that the producer can use these slots to fill in new entries. * * The function names below reflect these operations. */ /* Functions that read and validate content from consumer rings. */ static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, u64 length) { bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; bool next_pg_contig = (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & XSK_NEXT_PG_CONTIG_MASK; return cross_pg && !next_pg_contig; } static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, u64 addr, u64 length, struct xdp_umem *umem) { u64 base_addr = xsk_umem_extract_addr(addr); addr = xsk_umem_add_offset_to_addr(addr); if (base_addr >= q->size || addr >= q->size || xskq_cons_crosses_non_contig_pg(umem, addr, length)) { q->invalid_descs++; return false; } return true; } static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) { if (addr >= q->size) { q->invalid_descs++; return false; } return true; } static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, struct xdp_umem *umem) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; while (q->cached_cons != q->cached_prod) { u32 idx = q->cached_cons & q->ring_mask; *addr = ring->desc[idx] & q->chunk_mask; if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { if (xskq_cons_is_valid_unaligned(q, *addr, umem->chunk_size_nohr, umem)) return true; goto out; } if (xskq_cons_is_valid_addr(q, *addr)) return true; out: q->cached_cons++; } return false; } static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, struct xdp_umem *umem) { if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) return false; if (d->len > umem->chunk_size_nohr || d->options) { q->invalid_descs++; return false; } return true; } if (!xskq_cons_is_valid_addr(q, d->addr)) return false; if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || d->options) { q->invalid_descs++; return false; } return true; } static inline bool xskq_cons_read_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xdp_umem *umem) { while (q->cached_cons != q->cached_prod) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx = q->cached_cons & q->ring_mask; *desc = ring->desc[idx]; if (xskq_cons_is_valid_desc(q, desc, umem)) return true; q->cached_cons++; } return false; } /* Functions for consumers */ static inline void __xskq_cons_release(struct xsk_queue *q) { smp_mb(); /* D, matches A */ WRITE_ONCE(q->ring->consumer, q->cached_cons); } static inline void __xskq_cons_peek(struct xsk_queue *q) { /* Refresh the local pointer */ q->cached_prod = READ_ONCE(q->ring->producer); smp_rmb(); /* C, matches B */ } static inline void xskq_cons_get_entries(struct xsk_queue *q) { __xskq_cons_release(q); __xskq_cons_peek(q); } static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) { u32 entries = q->cached_prod - q->cached_cons; if (entries >= cnt) return true; __xskq_cons_peek(q); entries = q->cached_prod - q->cached_cons; return entries >= cnt; } static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, struct xdp_umem *umem) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); return xskq_cons_read_addr(q, addr, umem); } static inline bool xskq_cons_peek_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xdp_umem *umem) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); return xskq_cons_read_desc(q, desc, umem); } static inline void xskq_cons_release(struct xsk_queue *q) { /* To improve performance, only update local state here. * Reflect this to global state when we get new entries * from the ring in xskq_cons_get_entries(). */ q->cached_cons++; } static inline bool xskq_cons_is_full(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == q->nentries; } /* Functions for producers */ static inline bool xskq_prod_is_full(struct xsk_queue *q) { u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); if (free_entries) return false; /* Refresh the local tail pointer */ q->cached_cons = READ_ONCE(q->ring->consumer); free_entries = q->nentries - (q->cached_prod - q->cached_cons); return !free_entries; } static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ q->cached_prod++; return 0; } static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ ring->desc[q->cached_prod++ & q->ring_mask] = addr; return 0; } static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u64 addr, u32 len) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx; if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ idx = q->cached_prod++ & q->ring_mask; ring->desc[idx].addr = addr; ring->desc[idx].len = len; return 0; } static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) { smp_wmb(); /* B, matches C */ WRITE_ONCE(q->ring->producer, idx); } static inline void xskq_prod_submit(struct xsk_queue *q) { __xskq_prod_submit(q, q->cached_prod); } static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; u32 idx = q->ring->producer; ring->desc[idx++ & q->ring_mask] = addr; __xskq_prod_submit(q, idx); } static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) { __xskq_prod_submit(q, q->ring->producer + nb_entries); } static inline bool xskq_prod_is_empty(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); } /* For both producers and consumers */ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) { return q ? q->invalid_descs : 0; } void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); /* Executed by the core when the entire UMEM gets freed */ void xsk_reuseq_destroy(struct xdp_umem *umem); #endif /* _LINUX_XSK_QUEUE_H */