/* SPDX-License-Identifier: GPL-2.0 */ /* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. */ #ifndef _LINUX_XSK_QUEUE_H #define _LINUX_XSK_QUEUE_H #include #include #include #define RX_BATCH_SIZE 16 struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; u32 flags; }; /* Used for the RX and TX queues for packets */ struct xdp_rxtx_ring { struct xdp_ring ptrs; struct xdp_desc desc[0] ____cacheline_aligned_in_smp; }; /* Used for the fill and completion queues for buffers */ struct xdp_umem_ring { struct xdp_ring ptrs; u64 desc[0] ____cacheline_aligned_in_smp; }; struct xsk_queue { u64 chunk_mask; u64 size; u32 ring_mask; u32 nentries; u32 cached_prod; u32 cons_head; u32 cons_tail; struct xdp_ring *ring; u64 invalid_descs; }; /* The structure of the shared state of the rings are the same as the * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion * ring, the kernel is the producer and user space is the consumer. For * the Tx and fill rings, the kernel is the consumer and user space is * the producer. * * producer consumer * * if (LOAD ->consumer) { LOAD ->producer * (A) smp_rmb() (C) * STORE $data LOAD $data * smp_wmb() (B) smp_mb() (D) * STORE ->producer STORE ->consumer * } * * (A) pairs with (D), and (B) pairs with (C). * * Starting with (B), it protects the data from being written after * the producer pointer. If this barrier was missing, the consumer * could observe the producer pointer being set and thus load the data * before the producer has written the new data. The consumer would in * this case load the old data. * * (C) protects the consumer from speculatively loading the data before * the producer pointer actually has been read. If we do not have this * barrier, some architectures could load old data as speculative loads * are not discarded as the CPU does not know there is a dependency * between ->producer and data. * * (A) is a control dependency that separates the load of ->consumer * from the stores of $data. In case ->consumer indicates there is no * room in the buffer to store $data we do not. So no barrier is needed. * * (D) protects the load of the data to be observed to happen after the * store of the consumer pointer. If we did not have this memory * barrier, the producer could observe the consumer pointer being set * and overwrite the data with a new value before the consumer got the * chance to read the old value. The consumer would thus miss reading * the old entry and very likely read the new entry twice, once right * now and again after circling through the ring. */ /* Common functions operating for both RXTX and umem queues */ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) { return q ? q->invalid_descs : 0; } static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) { u32 entries = q->cached_prod - q->cons_tail; if (entries == 0) { /* Refresh the local pointer */ q->cached_prod = READ_ONCE(q->ring->producer); entries = q->cached_prod - q->cons_tail; } return (entries > dcnt) ? dcnt : entries; } static inline u32 xskq_nb_free(struct xsk_queue *q, u32 dcnt) { u32 free_entries = q->nentries - (q->cached_prod - q->cons_tail); if (free_entries >= dcnt) return free_entries; /* Refresh the local tail pointer */ q->cons_tail = READ_ONCE(q->ring->consumer); return q->nentries - (q->cached_prod - q->cons_tail); } static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) { u32 entries = q->cached_prod - q->cons_tail; if (entries >= cnt) return true; /* Refresh the local pointer. */ q->cached_prod = READ_ONCE(q->ring->producer); entries = q->cached_prod - q->cons_tail; return entries >= cnt; } /* UMEM queue */ static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, u64 length) { bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; bool next_pg_contig = (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & XSK_NEXT_PG_CONTIG_MASK; return cross_pg && !next_pg_contig; } static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { if (addr >= q->size) { q->invalid_descs++; return false; } return true; } static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, u64 length, struct xdp_umem *umem) { u64 base_addr = xsk_umem_extract_addr(addr); addr = xsk_umem_add_offset_to_addr(addr); if (base_addr >= q->size || addr >= q->size || xskq_crosses_non_contig_pg(umem, addr, length)) { q->invalid_descs++; return false; } return true; } static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { if (xskq_is_valid_addr_unaligned(q, *addr, umem->chunk_size_nohr, umem)) return addr; goto out; } if (xskq_is_valid_addr(q, *addr)) return addr; out: q->cons_tail++; } return NULL; } static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ WRITE_ONCE(q->ring->consumer, q->cons_tail); q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); /* Order consumer and data */ smp_rmb(); } return xskq_validate_addr(q, addr, umem); } static inline void xskq_discard_addr(struct xsk_queue *q) { q->cons_tail++; } static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_nb_free(q, 1) == 0) return -ENOSPC; /* A, matches D */ q->cached_prod++; return 0; } static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; if (xskq_nb_free(q, 1) == 0) return -ENOSPC; /* A, matches D */ ring->desc[q->cached_prod++ & q->ring_mask] = addr; return 0; } static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) { /* Order producer and data */ smp_wmb(); /* B, matches C */ WRITE_ONCE(q->ring->producer, idx); } static inline void xskq_prod_submit(struct xsk_queue *q) { __xskq_prod_submit(q, q->cached_prod); } static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; u32 idx = q->ring->producer; ring->desc[idx++ & q->ring_mask] = addr; __xskq_prod_submit(q, idx); } static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) { __xskq_prod_submit(q, q->ring->producer + nb_entries); } /* Rx/Tx queue */ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, struct xdp_umem *umem) { if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) return false; if (d->len > umem->chunk_size_nohr || d->options) { q->invalid_descs++; return false; } return true; } if (!xskq_is_valid_addr(q, d->addr)) return false; if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || d->options) { q->invalid_descs++; return false; } return true; } static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *desc = READ_ONCE(ring->desc[idx]); if (xskq_is_valid_desc(q, desc, umem)) return desc; q->cons_tail++; } return NULL; } static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ WRITE_ONCE(q->ring->consumer, q->cons_tail); q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); /* Order consumer and data */ smp_rmb(); /* C, matches B */ } return xskq_validate_desc(q, desc, umem); } static inline void xskq_discard_desc(struct xsk_queue *q) { q->cons_tail++; } static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u64 addr, u32 len) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx; if (xskq_nb_free(q, 1) == 0) return -ENOSPC; /* A, matches D */ idx = q->cached_prod++ & q->ring_mask; ring->desc[idx].addr = addr; ring->desc[idx].len = len; return 0; } static inline bool xskq_full_desc(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == q->nentries; } static inline bool xskq_prod_is_empty(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); } void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); /* Executed by the core when the entire UMEM gets freed */ void xsk_reuseq_destroy(struct xdp_umem *umem); #endif /* _LINUX_XSK_QUEUE_H */