diff options
-rw-r--r-- | include/net/sock.h | 20 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 2 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 234 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 15 | ||||
-rw-r--r-- | net/sched/em_meta.c | 2 |
6 files changed, 120 insertions, 155 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index ff4e62aa62e5..4f0280ad6c23 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1210,6 +1210,8 @@ struct proto { unsigned int inuse_idx; #endif + int (*forward_alloc_get)(const struct sock *sk); + bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*stream_memory_read)(const struct sock *sk); /* Memory pressure */ @@ -1217,6 +1219,7 @@ struct proto { void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ + /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. @@ -1294,6 +1297,14 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); +static inline int sk_forward_alloc_get(const struct sock *sk) +{ + if (!sk->sk_prot->forward_alloc_get) + return sk->sk_forward_alloc; + + return sk->sk_prot->forward_alloc_get(sk); +} + static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) @@ -1573,6 +1584,11 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } +/* the following macros control memory reclaiming in sk_mem_uncharge() + */ +#define SK_RECLAIM_THRESHOLD (1 << 21) +#define SK_RECLAIM_CHUNK (1 << 20) + static inline void sk_mem_uncharge(struct sock *sk, int size) { int reclaimable; @@ -1589,8 +1605,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) * If we reach 2 MBytes, reclaim 1 MBytes right now, there is * no need to hold that much forward allocation anyway. */ - if (unlikely(reclaimable >= 1 << 21)) - __sk_mem_reclaim(sk, 1 << 20); + if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) + __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); } static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 31d5cefa9979..0189e3cd4a7d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); + WARN_ON(sk_forward_alloc_get(sk)); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ef7897226f08..c8fa6e7f7d12 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -271,7 +271,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), - .idiag_fmem = sk->sk_forward_alloc, + .idiag_fmem = sk_forward_alloc_get(sk), .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cd6b11c9b54d..eb316bd578bb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -126,6 +126,11 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } +static void mptcp_rmem_charge(struct sock *sk, int size) +{ + mptcp_sk(sk)->rmem_fwd_alloc -= size; +} + static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from) { @@ -142,7 +147,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; kfree_skb_partial(from, fragstolen); atomic_add(delta, &sk->sk_rmem_alloc); - sk_mem_charge(sk, delta); + mptcp_rmem_charge(sk, delta); return true; } @@ -155,6 +160,44 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, return mptcp_try_coalesce((struct sock *)msk, to, from); } +static void __mptcp_rmem_reclaim(struct sock *sk, int amount) +{ + amount >>= SK_MEM_QUANTUM_SHIFT; + mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + __sk_mem_reduce_allocated(sk, amount); +} + +static void mptcp_rmem_uncharge(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int reclaimable; + + msk->rmem_fwd_alloc += size; + reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); + + /* see sk_mem_uncharge() for the rationale behind the following schema */ + if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) + __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK); +} + +static void mptcp_rfree(struct sk_buff *skb) +{ + unsigned int len = skb->truesize; + struct sock *sk = skb->sk; + + atomic_sub(len, &sk->sk_rmem_alloc); + mptcp_rmem_uncharge(sk, len); +} + +static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = mptcp_rfree; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + mptcp_rmem_charge(sk, skb->truesize); +} + /* "inspired" by tcp_data_queue_ofo(), main differences: * - use mptcp seqs * - don't cope with sacks @@ -267,7 +310,29 @@ merge_right: end: skb_condense(skb); - skb_set_owner_r(skb, sk); + mptcp_set_owner_r(skb, sk); +} + +static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int amt, amount; + + if (size < msk->rmem_fwd_alloc) + return true; + + amt = sk_mem_pages(size); + amount = amt << SK_MEM_QUANTUM_SHIFT; + msk->rmem_fwd_alloc += amount; + if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { + if (ssk->sk_forward_alloc < amount) { + msk->rmem_fwd_alloc -= amount; + return false; + } + + ssk->sk_forward_alloc -= amount; + } + return true; } static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -285,15 +350,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; - - if (ssk->sk_forward_alloc < amount) - goto drop; - - ssk->sk_forward_alloc -= amount; - sk->sk_forward_alloc += amount; - } + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + goto drop; has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -313,7 +371,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (tail && mptcp_try_coalesce(sk, tail, skb)) return true; - skb_set_owner_r(skb, sk); + mptcp_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); return true; } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { @@ -908,122 +966,20 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } -static int mptcp_wmem_with_overhead(int size) -{ - return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT); -} - -static void __mptcp_wmem_reserve(struct sock *sk, int size) -{ - int amount = mptcp_wmem_with_overhead(size); - struct mptcp_sock *msk = mptcp_sk(sk); - - WARN_ON_ONCE(msk->wmem_reserved); - if (WARN_ON_ONCE(amount < 0)) - amount = 0; - - if (amount <= sk->sk_forward_alloc) - goto reserve; - - /* under memory pressure try to reserve at most a single page - * otherwise try to reserve the full estimate and fallback - * to a single page before entering the error path - */ - if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) || - !sk_wmem_schedule(sk, amount)) { - if (amount <= PAGE_SIZE) - goto nomem; - - amount = PAGE_SIZE; - if (!sk_wmem_schedule(sk, amount)) - goto nomem; - } - -reserve: - msk->wmem_reserved = amount; - sk->sk_forward_alloc -= amount; - return; - -nomem: - /* we will wait for memory on next allocation */ - msk->wmem_reserved = -1; -} - -static void __mptcp_update_wmem(struct sock *sk) +static void __mptcp_mem_reclaim_partial(struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); + int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk); lockdep_assert_held_once(&sk->sk_lock.slock); - if (!msk->wmem_reserved) - return; - - if (msk->wmem_reserved < 0) - msk->wmem_reserved = 0; - if (msk->wmem_reserved > 0) { - sk->sk_forward_alloc += msk->wmem_reserved; - msk->wmem_reserved = 0; - } -} - -static bool mptcp_wmem_alloc(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - /* check for pre-existing error condition */ - if (msk->wmem_reserved < 0) - return false; - - if (msk->wmem_reserved >= size) - goto account; - - mptcp_data_lock(sk); - if (!sk_wmem_schedule(sk, size)) { - mptcp_data_unlock(sk); - return false; - } - - sk->sk_forward_alloc -= size; - msk->wmem_reserved += size; - mptcp_data_unlock(sk); - -account: - msk->wmem_reserved -= size; - return true; -} - -static void mptcp_wmem_uncharge(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - if (msk->wmem_reserved < 0) - msk->wmem_reserved = 0; - msk->wmem_reserved += size; -} - -static void __mptcp_mem_reclaim_partial(struct sock *sk) -{ - lockdep_assert_held_once(&sk->sk_lock.slock); - __mptcp_update_wmem(sk); + __mptcp_rmem_reclaim(sk, reclaimable - 1); sk_mem_reclaim_partial(sk); } static void mptcp_mem_reclaim_partial(struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); - - /* if we are experiencing a transint allocation error, - * the forward allocation memory has been already - * released - */ - if (msk->wmem_reserved < 0) - return; - mptcp_data_lock(sk); - sk->sk_forward_alloc += msk->wmem_reserved; - sk_mem_reclaim_partial(sk); - msk->wmem_reserved = sk->sk_forward_alloc; - sk->sk_forward_alloc = 0; + __mptcp_mem_reclaim_partial(sk); mptcp_data_unlock(sk); } @@ -1513,8 +1469,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) return NULL; } -static void mptcp_push_release(struct sock *sk, struct sock *ssk, - struct mptcp_sendmsg_info *info) +static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info) { tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); release_sock(ssk); @@ -1577,7 +1532,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) * the last round, release prev_ssk */ if (ssk != prev_ssk && prev_ssk) - mptcp_push_release(sk, prev_ssk, &info); + mptcp_push_release(prev_ssk, &info); if (!ssk) goto out; @@ -1590,7 +1545,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) { - mptcp_push_release(sk, ssk, &info); + mptcp_push_release(ssk, &info); goto out; } @@ -1605,7 +1560,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) /* at this point we held the socket lock for the last subflow we used */ if (ssk) - mptcp_push_release(sk, ssk, &info); + mptcp_push_release(ssk, &info); out: /* ensure the rtx timer is running */ @@ -1664,7 +1619,6 @@ out: /* __mptcp_alloc_tx_skb could have released some wmem and we are * not going to flush it via release_sock() */ - __mptcp_update_wmem(sk); if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); @@ -1701,7 +1655,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; - mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len))); + lock_sock(sk); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -1749,17 +1703,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) psize = min_t(size_t, psize, msg_data_left(msg)); total_ts = psize + frag_truesize; - if (!mptcp_wmem_alloc(sk, total_ts)) + if (!sk_wmem_schedule(sk, total_ts)) goto wait_for_memory; if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { - mptcp_wmem_uncharge(sk, psize + frag_truesize); ret = -EFAULT; goto out; } /* data successfully copied into the write queue */ + sk->sk_forward_alloc -= total_ts; copied += psize; dfrag->data_len += psize; frag_truesize += psize; @@ -1956,7 +1910,7 @@ static void __mptcp_update_rmem(struct sock *sk) return; atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); - sk_mem_uncharge(sk, msk->rmem_released); + mptcp_rmem_uncharge(sk, msk->rmem_released); WRITE_ONCE(msk->rmem_released, 0); } @@ -2024,7 +1978,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk)); + lock_sock(sk); if (unlikely(sk->sk_state == TCP_LISTEN)) { copied = -ENOTCONN; goto out_err; @@ -2504,7 +2458,7 @@ static int __mptcp_init_sock(struct sock *sk) __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - msk->wmem_reserved = 0; + msk->rmem_fwd_alloc = 0; WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; @@ -2715,7 +2669,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); - WARN_ON_ONCE(msk->wmem_reserved); + WARN_ON_ONCE(msk->rmem_fwd_alloc); WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); @@ -2948,8 +2902,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk) /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); - + __skb_queue_purge(&sk->sk_receive_queue); skb_rbtree_purge(&msk->out_of_order_queue); + + /* move all the rx fwd alloc into the sk_mem_reclaim_final in + * inet_sock_destruct() will dispose it + */ + sk->sk_forward_alloc += msk->rmem_fwd_alloc; + msk->rmem_fwd_alloc = 0; mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); } @@ -3031,10 +2991,6 @@ static void mptcp_release_cb(struct sock *sk) if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); - /* push_pending may touch wmem_reserved, ensure we do the cleanup - * later - */ - __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); } @@ -3184,6 +3140,11 @@ static void mptcp_shutdown(struct sock *sk, int how) __mptcp_wr_shutdown(sk); } +static int mptcp_forward_alloc_get(const struct sock *sk) +{ + return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3201,6 +3162,7 @@ static struct proto mptcp_prot = { .hash = mptcp_hash, .unhash = mptcp_unhash, .get_port = mptcp_get_port, + .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 284fdcec067e..67a61ac48b20 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -227,7 +227,7 @@ struct mptcp_sock { u64 ack_seq; u64 rcv_wnd_sent; u64 rcv_data_fin_seq; - int wmem_reserved; + int rmem_fwd_alloc; struct sock *last_snd; int snd_burst; int old_wspace; @@ -272,19 +272,6 @@ struct mptcp_sock { char ca_name[TCP_CA_NAME_MAX]; }; -#define mptcp_lock_sock(___sk, cb) do { \ - struct sock *__sk = (___sk); /* silence macro reuse warning */ \ - might_sleep(); \ - spin_lock_bh(&__sk->sk_lock.slock); \ - if (__sk->sk_lock.owned) \ - __lock_sock(__sk); \ - cb; \ - __sk->sk_lock.owned = 1; \ - spin_unlock(&__sk->sk_lock.slock); \ - mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \ - local_bh_enable(); \ -} while (0) - #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 46254968d390..0a04468b7314 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc) *err = -1; return; } - dst->value = sk->sk_forward_alloc; + dst->value = sk_forward_alloc_get(sk); } META_COLLECTOR(int_sk_sndbuf) |