aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-07-29 12:18:51 +0100
committerDavid S. Miller <davem@davemloft.net>2021-07-29 12:18:51 +0100
commit8cb79af5c63ff2dbcab048085302256dd7e8208d (patch)
tree3fd1eae06a0bb5286063d8aed35c11a51db5c531
parentDocumentation: networking: add ioam6-sysctl into index (diff)
parentveth: use skb_prepare_for_gro() (diff)
downloadlinux-dev-8cb79af5c63ff2dbcab048085302256dd7e8208d.tar.xz
linux-dev-8cb79af5c63ff2dbcab048085302256dd7e8208d.zip
Merge branch 'skb-gro-optimize'
Paolo Abeni says: ==================== sk_buff: optimize GRO for the common case This is a trimmed down revision of "sk_buff: optimize layout for GRO", specifically dropping the changes to the sk_buff layout[1]. This series tries to accomplish 2 goals: - optimize the GRO stage for the most common scenario, avoiding a bunch of conditional and some more code - let owned skbs entering the GRO engine, allowing backpressure in the veth GRO forward path. A new sk_buff flag (!!!) is introduced and maintained for GRO's sake. Such field uses an existing hole, so there is no change to the sk_buff size. [1] two main reasons: - move skb->inner_ field requires some extra care, as some in kernel users access and the fields regardless of skb->encapsulation. - extending secmark size clash with ct and nft uAPIs address the all above is possible, I think, but for sure not in a single series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/veth.c2
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/sock.h9
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/skbuff.c27
6 files changed, 62 insertions, 16 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 381670c08ba7..50eb43e5bf45 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -713,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
int mac_len, delta, off;
struct xdp_buff xdp;
- skb_orphan_partial(skb);
+ skb_prepare_for_gro(skb);
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f19190820e63..b1e5bbfcc926 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
* CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
+ * @slow_gro: state present at GRO time, slower prepare step required
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
@@ -870,6 +871,7 @@ struct sk_buff {
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
+ __u8 slow_gro:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
*/
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
+ skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst;
}
@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ skb->slow_gro = !!dst;
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
@@ -4216,6 +4220,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ skb->slow_gro |= !!nfct;
skb->_nfct = nfct;
#endif
}
@@ -4375,6 +4380,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb_nfct(dst));
#endif
+ dst->slow_gro = src->slow_gro;
__nf_copy(dst, src, true);
}
diff --git a/include/net/dst.h b/include/net/dst.h
index 75b1e734e9c2..a057319aabef 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
+ nskb->slow_gro |= !!refdst;
nskb->_skb_refdst = refdst;
if (!(nskb->_skb_refdst & SKB_DST_NOREF))
dst_clone(skb_dst(nskb));
@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
+ skb->slow_gro |= !!dst;
}
return skb->_skb_refdst != 0UL;
diff --git a/include/net/sock.h b/include/net/sock.h
index f23cb259b0e2..ff1be7e7e90b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2249,6 +2249,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
return false;
}
+static inline void skb_prepare_for_gro(struct sk_buff *skb)
+{
+ if (skb->destructor != sock_wfree) {
+ skb_orphan(skb);
+ return;
+ }
+ skb->slow_gro = 1;
+}
+
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
diff --git a/net/core/dev.c b/net/core/dev.c
index fb5d12a3d52d..dcc87fcd64ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6014,7 +6014,6 @@ static void gro_list_prepare(const struct list_head *head,
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
if (skb_vlan_tag_present(p))
diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
- diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
@@ -6024,17 +6023,30 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
- diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+ /* in most common scenarions _state is 0
+ * otherwise we are already on some slower paths
+ * either skip all the infrequent tests altogether or
+ * avoid trying too hard to skip each of them individually
+ */
+ if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *skb_ext;
+ struct tc_skb_ext *p_ext;
+#endif
+
+ diffs |= p->sk != skb->sk;
+ diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (!diffs) {
- struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
- struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
+ skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ p_ext = skb_ext_find(p, TC_SKB_EXT);
diffs |= (!!p_ext) ^ (!!skb_ext);
if (!diffs && unlikely(skb_ext))
diffs |= p_ext->chain ^ skb_ext->chain;
- }
#endif
+ }
NAPI_GRO_CB(p)->same_flow = !diffs;
}
@@ -6299,8 +6311,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- skb_ext_reset(skb);
- nf_reset_ct(skb);
+ if (unlikely(skb->slow_gro)) {
+ skb_orphan(skb);
+ skb_ext_reset(skb);
+ nf_reset_ct(skb);
+ skb->slow_gro = 0;
+ }
napi->skb = skb;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fc7942c0dddc..fcbd977186b0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
- nf_reset_ct(skb);
- skb_dst_drop(skb);
- skb_ext_put(skb);
+ if (unlikely(skb->slow_gro)) {
+ nf_reset_ct(skb);
+ skb_dst_drop(skb);
+ skb_ext_put(skb);
+ skb_orphan(skb);
+ skb->slow_gro = 0;
+ }
napi_skb_cache_put(skb);
}
@@ -3889,6 +3893,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->last = skb;
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
+
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -4256,6 +4263,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
+ unsigned int new_truesize;
struct sk_buff *lp;
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
@@ -4287,10 +4295,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
skb_frag_size_sub(frag, offset);
/* all fragments truesize : remove (head size + sk_buff) */
- delta_truesize = skb->truesize -
- SKB_TRUESIZE(skb_end_offset(skb));
+ new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ delta_truesize = skb->truesize - new_truesize;
- skb->truesize -= skb->data_len;
+ skb->truesize = new_truesize;
skb->len -= skb->data_len;
skb->data_len = 0;
@@ -4319,12 +4327,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
- delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
+ delta_truesize = skb->truesize - new_truesize;
+ skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
merge:
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
@@ -6449,6 +6461,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
new->chunks = newlen;
new->offset[id] = newoff;
set_active:
+ skb->slow_gro = 1;
skb->extensions = new;
skb->active_extensions |= 1 << id;
return skb_ext_get_ptr(new, id);