aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/skmsg.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/skmsg.c')
-rw-r--r--net/core/skmsg.c126
1 files changed, 68 insertions, 58 deletions
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8eb671c827f9..e6b9ced3eda8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce)
{
struct page_frag *pfrag = sk_page_frag(sk);
+ u32 osize = msg->sg.size;
int ret = 0;
len -= msg->sg.size;
@@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
u32 orig_offset;
int use, i;
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
orig_offset = pfrag->offset;
use = min_t(int, len, pfrag->size - orig_offset);
- if (!sk_wmem_schedule(sk, use))
- return -ENOMEM;
+ if (!sk_wmem_schedule(sk, use)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
i = msg->sg.end;
sk_msg_iter_var_prev(i);
@@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
}
return ret;
+
+msg_trim:
+ sk_msg_trim(sk, msg, osize);
+ return ret;
}
EXPORT_SYMBOL_GPL(sk_msg_alloc);
@@ -315,14 +324,13 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
goto out;
}
- copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+ copied = iov_iter_get_pages2(from, pages, bytes, maxpages,
&offset);
if (copied <= 0) {
ret = -EFAULT;
goto out;
}
- iov_iter_advance(from, copied);
bytes -= copied;
msg->sg.size += copied;
@@ -426,8 +434,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (copied + copy > len)
copy = len - copied;
copy = copy_page_to_iter(page, sge->offset, copy, iter);
- if (!copy)
- return copied ? copied : -EFAULT;
+ if (!copy) {
+ copied = copied ? copied : -EFAULT;
+ goto out;
+ }
copied += copy;
if (likely(!peek)) {
@@ -447,13 +457,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
* didn't copy the entire length lets just break.
*/
if (copy != sge->length)
- return copied;
+ goto out;
sk_msg_iter_var_next(i);
}
if (copied == len)
break;
- } while (i != msg_rx->sg.end);
+ } while ((i != msg_rx->sg.end) && !sg_is_last(sge));
if (unlikely(peek)) {
msg_rx = sk_psock_next_msg(psock, msg_rx);
@@ -463,13 +473,15 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) {
msg_rx = sk_psock_dequeue_msg(psock);
kfree_sk_msg(msg_rx);
}
msg_rx = sk_psock_peek_msg(psock);
}
-
+out:
+ if (psock->work_state.skb && copied > 0)
+ schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -488,23 +500,27 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
- struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(gfp_t gfp)
{
struct sk_msg *msg;
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN);
+ if (unlikely(!msg))
return NULL;
+ sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+ return msg;
+}
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
- if (unlikely(!msg))
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- sk_msg_init(msg);
- return msg;
+ return alloc_sk_msg(GFP_KERNEL);
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -515,16 +531,20 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
{
int num_sge, copied;
- /* skb linearize may fail with ENOMEM, but lets simply try again
- * later if this happens. Under memory pressure we don't want to
- * drop the skb. We need to linearize the skb so that the mapping
- * in skb_to_sgvec can not error.
- */
- if (skb_linearize(skb))
- return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
- if (unlikely(num_sge < 0))
- return num_sge;
+ if (num_sge < 0) {
+ /* skb linearize may fail with ENOMEM, but lets simply try again
+ * later if this happens. Under memory pressure we don't want to
+ * drop the skb. We need to linearize the skb so that the mapping
+ * in skb_to_sgvec can not error.
+ */
+ if (skb_linearize(skb))
+ return -EAGAIN;
+
+ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
+ if (unlikely(num_sge < 0))
+ return num_sge;
+ }
copied = len;
msg->sg.start = 0;
@@ -577,13 +597,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{
- struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
int err;
if (unlikely(!msg))
return -EAGAIN;
- sk_msg_init(msg);
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
@@ -686,6 +705,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
write_lock_bh(&sk->sk_callback_lock);
+ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
@@ -702,6 +726,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
+ psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
@@ -717,7 +742,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
refcount_set(&psock->refcnt, 1);
- rcu_assign_sk_user_data_nocopy(sk, psock);
+ __rcu_assign_sk_user_data_with_flags(sk, psock,
+ SK_USER_DATA_NOCOPY |
+ SK_USER_DATA_PSOCK);
sock_hold(sk);
out:
@@ -776,16 +803,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
}
}
-void sk_psock_stop(struct sk_psock *psock, bool wait)
+void sk_psock_stop(struct sk_psock *psock)
{
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
-
- if (wait)
- cancel_work_sync(&psock->work);
}
static void sk_psock_done_strp(struct sk_psock *psock);
@@ -823,7 +847,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
- sk_psock_stop(psock, false);
+ sk_psock_stop(psock);
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
queue_rcu_work(system_wq, &psock->rwork);
@@ -1146,21 +1170,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
}
#endif /* CONFIG_BPF_STREAM_PARSER */
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
- unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *)desc->arg.data;
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
int len = skb->len;
- /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- desc->error = -ENOMEM;
- return 0;
- }
+ skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
@@ -1173,15 +1190,14 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
if (!prog)
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb->sk = sk;
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
- skb->sk = NULL;
}
- if (sk_psock_verdict_apply(psock, skb, ret) < 0)
- len = 0;
+ ret = sk_psock_verdict_apply(psock, skb, ret);
+ if (ret < 0)
+ len = ret;
out:
rcu_read_unlock();
return len;
@@ -1190,16 +1206,10 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- read_descriptor_t desc;
- if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
-
- desc.arg.data = sk;
- desc.error = 0;
- desc.count = 1;
-
- sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+ sock->ops->read_skb(sk, sk_psock_verdict_recv);
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)