diff options
Diffstat (limited to 'net')
103 files changed, 2719 insertions, 790 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 990b9fde28c6..f00bb57f0f60 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -88,12 +88,11 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER - if (vlan->netpoll) - netpoll_send_skb(vlan->netpoll, skb); + return netpoll_send_skb(vlan->netpoll, skb); #else BUG(); -#endif return NETDEV_TX_OK; +#endif } static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, @@ -489,6 +488,25 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + +static void vlan_dev_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *unused) +{ + lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); +} + +static void vlan_dev_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, @@ -579,6 +597,8 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); + vlan_dev_set_lockdep_class(dev); + vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) return -ENOMEM; diff --git a/net/Kconfig b/net/Kconfig index c5ba2d180c43..5c524c6ee75d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -455,6 +455,7 @@ config FAILOVER config ETHTOOL_NETLINK bool "Netlink interface for ethtool" default y + depends on PHYLIB=y || PHYLIB=n help An alternative userspace interface for ethtool based on generic netlink. It provides better extensibility and some new features, diff --git a/net/atm/common.c b/net/atm/common.c index 0ce530af534d..8575f5d52087 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -177,18 +177,18 @@ static void vcc_destroy_socket(struct sock *sk) set_bit(ATM_VF_CLOSE, &vcc->flags); clear_bit(ATM_VF_READY, &vcc->flags); - if (vcc->dev) { - if (vcc->dev->ops->close) - vcc->dev->ops->close(vcc); - if (vcc->push) - vcc->push(vcc, NULL); /* atmarpd has no push */ - module_put(vcc->owner); - - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - } + if (vcc->dev && vcc->dev->ops->close) + vcc->dev->ops->close(vcc); + if (vcc->push) + vcc->push(vcc, NULL); /* atmarpd has no push */ + module_put(vcc->owner); + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + atm_return(vcc, skb->truesize); + kfree_skb(skb); + } + if (vcc->dev && vcc->dev->ops->owner) { module_put(vcc->dev->ops->owner); atm_dev_put(vcc->dev); } diff --git a/net/atm/lec.c b/net/atm/lec.c index 25fa3a7b72bd..ca37f5a71f5e 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1264,6 +1264,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry) entry->vcc = NULL; } if (entry->recv_vcc) { + struct atm_vcc *vcc = entry->recv_vcc; + struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); + + kfree(vpriv); + vcc->user_back = NULL; + entry->recv_vcc->push = entry->old_recv_push; vcc_release_async(entry->recv_vcc, -EPIPE); entry->recv_vcc = NULL; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 0959d32be65c..18028b9f95f0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -893,7 +893,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig); if (!orig_node) - return; + goto out; neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming, ethhdr->h_source); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 8f0717c3f7b5..b0469d15da0e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1009,15 +1009,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, */ static u8 batadv_nc_random_weight_tq(u8 tq) { - u8 rand_val, rand_tq; - - get_random_bytes(&rand_val, sizeof(rand_val)); - /* randomize the estimated packet loss (max TQ - estimated TQ) */ - rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); - - /* normalize the randomized packet loss */ - rand_tq /= BATADV_TQ_MAX_VALUE; + u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq); /* convert to (randomized) estimated tq again */ return BATADV_TQ_MAX_VALUE - rand_tq; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5f05a728f347..822af540b854 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -739,6 +739,34 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, return 0; } +/* batman-adv network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key batadv_netdev_xmit_lock_key; + +/** + * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue + * @dev: device which owns the tx queue + * @txq: tx queue to modify + * @_unused: always NULL + */ +static void batadv_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); +} + +/** + * batadv_set_lockdep_class() - Set txq and addr_list lockdep class + * @dev: network device to modify + */ +static void batadv_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); +} + /** * batadv_softif_init_late() - late stage initialization of soft interface * @dev: registered network device to modify @@ -752,6 +780,8 @@ static int batadv_softif_init_late(struct net_device *dev) int ret; size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; + batadv_set_lockdep_class(dev); + bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index c45962d8527b..0f962dcd239e 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1150,7 +1150,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, ret = batadv_parse_throughput(net_dev, buff, "throughput_override", &tp_override); if (!ret) - return count; + goto out; old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override); if (old_tp_override == tp_override) @@ -1190,6 +1190,7 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj, tp_override = atomic_read(&hard_iface->bat_v.throughput_override); + batadv_hardif_put(hard_iface); return sprintf(buff, "%u.%u MBit\n", tp_override / 10, tp_override % 10); } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 4febc82a7c76..bb55d92691b0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -571,7 +571,15 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) return err < 0 ? NET_XMIT_DROP : err; } +static int bt_dev_init(struct net_device *dev) +{ + netdev_lockdep_set_classes(dev); + + return 0; +} + static const struct net_device_ops netdev_ops = { + .ndo_init = bt_dev_init, .ndo_start_xmit = bt_xmit, }; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 9e25c6570170..1d6d243cdde9 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -135,4 +135,11 @@ config BT_SELFTEST_SMP Run test cases for SMP cryptographic functionality, including both legacy SMP as well as the Secure Connections features. +config BT_FEATURE_DEBUG + bool "Enable runtime option for debugging statements" + depends on BT && !DYNAMIC_DEBUG + help + This provides an option to enable/disable debugging statements + at runtime via the experimental features interface. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 51d399273276..dbe2d79f233f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3350,10 +3350,12 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, */ ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT); - /* Only configure whitelist if disconnect succeeded */ - if (!ret) + /* Only configure whitelist if disconnect succeeded and wake + * isn't being prevented. + */ + if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) ret = hci_change_suspend_state(hdev, - BT_SUSPEND_COMPLETE); + BT_SUSPEND_CONFIGURE_WAKE); } else if (action == PM_POST_SUSPEND) { ret = hci_change_suspend_state(hdev, BT_RUNNING); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 966fc543c01d..73aabca0064b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -42,12 +42,27 @@ /* Handle HCI Event packets */ -static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb, + u8 *new_status) { __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); + /* It is possible that we receive Inquiry Complete event right + * before we receive Inquiry Cancel Command Complete event, in + * which case the latter event should have status of Command + * Disallowed (0x0c). This should not be treated as error, since + * we actually achieve what Inquiry Cancel wants to achieve, + * which is to end the last Inquiry session. + */ + if (status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) { + bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command"); + status = 0x00; + } + + *new_status = status; + if (status) return; @@ -3233,7 +3248,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, switch (*opcode) { case HCI_OP_INQUIRY_CANCEL: - hci_cc_inquiry_cancel(hdev, skb); + hci_cc_inquiry_cancel(hdev, skb, status); break; case HCI_OP_PERIODIC_INQ: @@ -5288,7 +5303,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, /* Most controller will fail if we try to create new connections * while we have an existing one in slave role. */ - if (hdev->conn_hash.le_num_slave > 0) + if (hdev->conn_hash.le_num_slave > 0 && + (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || + !(hdev->le_states[3] & 0x10))) return NULL; /* If we're not connectable only connect devices that we have in diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9ea40106ef17..1fc55685da62 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -35,7 +35,7 @@ #define HCI_REQ_CANCELED 2 #define LE_SUSPEND_SCAN_WINDOW 0x0012 -#define LE_SUSPEND_SCAN_INTERVAL 0x0060 +#define LE_SUSPEND_SCAN_INTERVAL 0x0400 void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { @@ -890,7 +890,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 own_addr_type; u8 filter_policy; - u8 window, interval; + u16 window, interval; if (hdev->scanning_paused) { bt_dev_dbg(hdev, "Scanning is paused for suspend"); @@ -1090,7 +1090,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) disconnect_counter); set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks); } - } else if (next == BT_SUSPEND_COMPLETE) { + } else if (next == BT_SUSPEND_CONFIGURE_WAKE) { /* Unpause to take care of updating scanning params */ hdev->scanning_paused = false; /* Enable event filter for paired devices */ @@ -1447,7 +1447,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); hdev->scan_rsp_data_len = len; - cp.handle = 0; + cp.handle = instance; cp.length = len; cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; @@ -1591,7 +1591,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance) hdev->adv_data_len = len; cp.length = len; - cp.handle = 0; + cp.handle = instance; cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; @@ -1876,7 +1876,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - cp.handle = 0; + cp.handle = instance; bacpy(&cp.bdaddr, &random_addr); hci_req_add(req, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9c4a093f8960..caf38a8ea6a8 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1579,11 +1579,13 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, } } - no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); - if (no_hdev != !hdev) { - err = mgmt_cmd_status(sk, index, opcode, - MGMT_STATUS_INVALID_INDEX); - goto done; + if (!(handler->flags & HCI_MGMT_HDEV_OPTIONAL)) { + no_hdev = (handler->flags & HCI_MGMT_NO_HDEV); + if (no_hdev != !hdev) { + err = mgmt_cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } } var_len = (handler->flags & HCI_MGMT_VAR_LEN); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fd9d0d08f9c9..fe913a5c754a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5927,7 +5927,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, if (!enable_ecred) return -EINVAL; - if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } @@ -5964,7 +5964,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, } result = L2CAP_CR_LE_SUCCESS; - cmd_len -= sizeof(req); + cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); for (i = 0; i < num_scid; i++) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1cea42ee1e92..a995d2c51fa7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1271,14 +1271,21 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) struct l2cap_conn *conn; int err = 0; - BT_DBG("sock %p, sk %p", sock, sk); + BT_DBG("sock %p, sk %p, how %d", sock, sk, how); + + /* 'how' parameter is mapped to sk_shutdown as follows: + * SHUT_RD (0) --> RCV_SHUTDOWN (1) + * SHUT_WR (1) --> SEND_SHUTDOWN (2) + * SHUT_RDWR (2) --> SHUTDOWN_MASK (3) + */ + how++; if (!sk) return 0; lock_sock(sk); - if (sk->sk_shutdown) + if ((sk->sk_shutdown & how) == how) goto shutdown_already; BT_DBG("Handling sock shutdown"); @@ -1301,11 +1308,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) * has already been actioned to close the L2CAP * link such as by l2cap_disconnection_req(). */ - if (sk->sk_shutdown) - goto has_shutdown; + if ((sk->sk_shutdown & how) == how) + goto shutdown_matched; } - sk->sk_shutdown = SHUTDOWN_MASK; + /* Try setting the RCV_SHUTDOWN bit, return early if SEND_SHUTDOWN + * is already set + */ + if ((how & RCV_SHUTDOWN) && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + sk->sk_shutdown |= RCV_SHUTDOWN; + if ((sk->sk_shutdown & how) == how) + goto shutdown_matched; + } + + sk->sk_shutdown |= SEND_SHUTDOWN; release_sock(sk); l2cap_chan_lock(chan); @@ -1335,7 +1351,7 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); -has_shutdown: +shutdown_matched: l2cap_chan_put(chan); sock_put(sk); @@ -1363,7 +1379,7 @@ static int l2cap_sock_release(struct socket *sock) bt_sock_unlink(&l2cap_sk_list, sk); - err = l2cap_sock_shutdown(sock, 2); + err = l2cap_sock_shutdown(sock, SHUT_RDWR); chan = l2cap_pi(sk)->chan; l2cap_chan_hold(chan); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index c09e0a3a0ed9..5326f41a58b7 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -183,6 +183,39 @@ void bt_err(const char *format, ...) } EXPORT_SYMBOL(bt_err); +#ifdef CONFIG_BT_FEATURE_DEBUG +static bool debug_enable; + +void bt_dbg_set(bool enable) +{ + debug_enable = enable; +} + +bool bt_dbg_get(void) +{ + return debug_enable; +} + +void bt_dbg(const char *format, ...) +{ + struct va_format vaf; + va_list args; + + if (likely(!debug_enable)) + return; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + printk(KERN_DEBUG pr_fmt("%pV"), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(bt_dbg); +#endif + void bt_warn_ratelimited(const char *format, ...) { struct va_format vaf; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f8c0a4fc8090..9e8a3cccc6ca 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -109,6 +109,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_BLOCKED_KEYS, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_OP_READ_SECURITY_INFO, + MGMT_OP_READ_EXP_FEATURES_INFO, + MGMT_OP_SET_EXP_FEATURE, }; static const u16 mgmt_events[] = { @@ -147,6 +149,8 @@ static const u16 mgmt_events[] = { MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, MGMT_EV_EXT_INFO_CHANGED, + MGMT_EV_PHY_CONFIGURATION_CHANGED, + MGMT_EV_EXP_FEATURE_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -157,6 +161,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_EXT_INDEX_LIST, MGMT_OP_READ_EXT_INFO, MGMT_OP_READ_SECURITY_INFO, + MGMT_OP_READ_EXP_FEATURES_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -171,6 +176,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, MGMT_EV_EXT_INFO_CHANGED, + MGMT_EV_EXP_FEATURE_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -293,7 +299,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_rp_read_version rp; - BT_DBG("sock %p", sk); + bt_dev_dbg(hdev, "sock %p", sk); mgmt_fill_version_info(&rp); @@ -309,7 +315,7 @@ static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data, size_t rp_size; int i, err; - BT_DBG("sock %p", sk); + bt_dev_dbg(hdev, "sock %p", sk); if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) { num_commands = ARRAY_SIZE(mgmt_commands); @@ -362,7 +368,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 count; int err; - BT_DBG("sock %p", sk); + bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); @@ -396,7 +402,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (d->dev_type == HCI_PRIMARY && !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); - BT_DBG("Added hci%u", d->id); + bt_dev_dbg(hdev, "Added hci%u", d->id); } } @@ -422,7 +428,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, u16 count; int err; - BT_DBG("sock %p", sk); + bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); @@ -456,7 +462,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, if (d->dev_type == HCI_PRIMARY && hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); - BT_DBG("Added hci%u", d->id); + bt_dev_dbg(hdev, "Added hci%u", d->id); } } @@ -481,7 +487,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, u16 count; int err; - BT_DBG("sock %p", sk); + bt_dev_dbg(hdev, "sock %p", sk); read_lock(&hci_dev_list_lock); @@ -523,7 +529,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, rp->entry[count].bus = d->bus; rp->entry[count++].index = cpu_to_le16(d->id); - BT_DBG("Added hci%u", d->id); + bt_dev_dbg(hdev, "Added hci%u", d->id); } rp->num_controllers = cpu_to_le16(count); @@ -599,7 +605,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev, struct mgmt_rp_read_config_info rp; u32 options = 0; - BT_DBG("sock %p %s", sk, hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -939,7 +945,7 @@ static void rpa_expired(struct work_struct *work) rpa_expired.work); struct hci_request req; - BT_DBG(""); + bt_dev_dbg(hdev, ""); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); @@ -979,7 +985,7 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, { struct mgmt_rp_read_info rp; - BT_DBG("sock %p %s", sk, hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -1035,7 +1041,7 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, struct mgmt_rp_read_ext_info *rp = (void *)buf; u16 eir_len; - BT_DBG("sock %p %s", sk, hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); memset(&buf, 0, sizeof(buf)); @@ -1094,7 +1100,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("%s status 0x%02x", hdev->name, status); + bt_dev_dbg(hdev, "status 0x%02x", status); if (hci_conn_count(hdev) == 0) { cancel_delayed_work(&hdev->power_off); @@ -1170,7 +1176,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_pending_cmd *cmd; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, @@ -1311,7 +1317,7 @@ void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -1350,7 +1356,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 timeout; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) @@ -1476,7 +1482,7 @@ void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -1536,7 +1542,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_pending_cmd *cmd; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) && !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) @@ -1593,7 +1599,7 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, bool changed; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, @@ -1637,7 +1643,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, u8 val, status; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_bredr_support(hdev); if (status) @@ -1705,7 +1711,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) u8 status; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_bredr_support(hdev); if (status) @@ -1786,7 +1792,7 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) u8 status; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_bredr_support(hdev); if (status) @@ -1892,7 +1898,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) int err; u8 val, enabled; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE, @@ -2053,7 +2059,7 @@ unlock: static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status); } @@ -2066,7 +2072,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct bt_uuid *uuid; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -2132,7 +2138,7 @@ static bool enable_service_cache(struct hci_dev *hdev) static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status); } @@ -2147,7 +2153,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_request req; int err, found; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -2218,7 +2224,7 @@ unlock: static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status); } @@ -2231,7 +2237,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_request req; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, @@ -2304,7 +2310,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, bool changed; int i; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, @@ -2330,8 +2336,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); - BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys, - key_count); + bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys, + key_count); for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; @@ -2532,7 +2538,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); @@ -2616,7 +2622,7 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, int err; u16 i; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -2692,7 +2698,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_pending_cmd *cmd; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -2750,7 +2756,7 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_io_capability *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, @@ -2760,8 +2766,7 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, hdev->io_capability = cp->io_capability; - BT_DBG("%s IO capability set to 0x%02x", hdev->name, - hdev->io_capability); + bt_dev_dbg(hdev, "IO capability set to 0x%02x", hdev->io_capability); hci_dev_unlock(hdev); @@ -2873,7 +2878,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); @@ -3002,7 +3007,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -3113,7 +3118,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_pin_code_neg_reply *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_PIN_CODE_NEG_REPLY, @@ -3125,7 +3130,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_user_confirm_reply *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); if (len != sizeof(*cp)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, @@ -3141,7 +3146,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_user_confirm_neg_reply *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_CONFIRM_NEG_REPLY, @@ -3153,7 +3158,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_user_passkey_reply *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_REPLY, @@ -3165,7 +3170,7 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_user_passkey_neg_reply *cp = data; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); return user_pairing_resp(sk, hdev, &cp->addr, MGMT_OP_USER_PASSKEY_NEG_REPLY, @@ -3206,7 +3211,7 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct mgmt_cp_set_local_name *cp; struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -3241,7 +3246,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_request req; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -3310,7 +3315,7 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, u16 appearance; int err; - BT_DBG(""); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, @@ -3342,7 +3347,7 @@ static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev, { struct mgmt_rp_get_phy_confguration rp; - BT_DBG("sock %p %s", sk, hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -3375,7 +3380,7 @@ static void set_default_phy_complete(struct hci_dev *hdev, u8 status, { struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -3413,7 +3418,7 @@ static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev, bool changed = false; int err; - BT_DBG("sock %p %s", sk, hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); configurable_phys = get_configurable_phys(hdev); supported_phys = get_supported_phys(hdev); @@ -3566,7 +3571,7 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 key_count, expected_len; int i; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); key_count = __le16_to_cpu(keys->key_count); if (key_count > max_key_count) { @@ -3612,7 +3617,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, int err; bool changed = false; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) return mgmt_cmd_status(sk, hdev->id, @@ -3710,6 +3715,140 @@ static int read_security_info(struct sock *sk, struct hci_dev *hdev, rp, sizeof(*rp) + sec_len); } +#ifdef CONFIG_BT_FEATURE_DEBUG +/* d4992530-b9ec-469f-ab01-6c481c47da1c */ +static const u8 debug_uuid[16] = { + 0x1c, 0xda, 0x47, 0x1c, 0x48, 0x6c, 0x01, 0xab, + 0x9f, 0x46, 0xec, 0xb9, 0x30, 0x25, 0x99, 0xd4, +}; +#endif + +static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + char buf[42]; + struct mgmt_rp_read_exp_features_info *rp = (void *)buf; + u16 idx = 0; + + bt_dev_dbg(hdev, "sock %p", sk); + + memset(&buf, 0, sizeof(buf)); + +#ifdef CONFIG_BT_FEATURE_DEBUG + if (!hdev) { + u32 flags = bt_dbg_get() ? BIT(0) : 0; + + memcpy(rp->features[idx].uuid, debug_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } +#endif + + rp->feature_count = cpu_to_le16(idx); + + /* After reading the experimental features information, enable + * the events to update client on any future change. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_READ_EXP_FEATURES_INFO, + 0, rp, sizeof(*rp) + (20 * idx)); +} + +#ifdef CONFIG_BT_FEATURE_DEBUG +static int exp_debug_feature_changed(bool enabled, struct sock *skip) +{ + struct mgmt_ev_exp_feature_changed ev; + + memset(&ev, 0, sizeof(ev)); + memcpy(ev.uuid, debug_uuid, 16); + ev.flags = cpu_to_le32(enabled ? BIT(0) : 0); + + return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL, + &ev, sizeof(ev), + HCI_MGMT_EXP_FEATURE_EVENTS, skip); +} +#endif + +static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_set_exp_feature *cp = data; + struct mgmt_rp_set_exp_feature rp; + + bt_dev_dbg(hdev, "sock %p", sk); + + if (!memcmp(cp->uuid, ZERO_KEY, 16)) { + memset(rp.uuid, 0, 16); + rp.flags = cpu_to_le32(0); + +#ifdef CONFIG_BT_FEATURE_DEBUG + if (!hdev) { + bool changed = bt_dbg_get(); + + bt_dbg_set(false); + + if (changed) + exp_debug_feature_changed(false, sk); + } +#endif + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + } + +#ifdef CONFIG_BT_FEATURE_DEBUG + if (!memcmp(cp->uuid, debug_uuid, 16)) { + bool val, changed; + int err; + + /* Command requires to use the non-controller index */ + if (hdev) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = !!cp->param[0]; + changed = val ? !bt_dbg_get() : bt_dbg_get(); + bt_dbg_set(val); + + memcpy(rp.uuid, debug_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_debug_feature_changed(val, sk); + + return err; + } +#endif + + return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_NOT_SUPPORTED); +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -3717,7 +3856,7 @@ static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, size_t rp_size = sizeof(mgmt_rp); struct mgmt_pending_cmd *cmd; - BT_DBG("%s status %u", hdev->name, status); + bt_dev_dbg(hdev, "status %u", status); cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); if (!cmd) @@ -3776,7 +3915,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, struct hci_request req; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -3826,7 +3965,7 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, struct mgmt_addr_info *addr = data; int err; - BT_DBG("%s ", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(addr->type)) return mgmt_cmd_complete(sk, hdev->id, @@ -3935,7 +4074,7 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, u8 status; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (cp->addr.type != BDADDR_BREDR) return mgmt_cmd_complete(sk, hdev->id, @@ -3969,7 +4108,7 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - BT_DBG("status %d", status); + bt_dev_dbg(hdev, "status %d", status); hci_dev_lock(hdev); @@ -4030,7 +4169,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, u8 status; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -4122,7 +4261,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, u8 status; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -4217,7 +4356,7 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - BT_DBG("status %d", status); + bt_dev_dbg(hdev, "status %d", status); hci_dev_lock(hdev); @@ -4243,7 +4382,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_pending_cmd *cmd; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -4285,7 +4424,7 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data, struct inquiry_entry *e; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -4327,7 +4466,7 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, u8 status; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, @@ -4363,7 +4502,7 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, u8 status; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type)) return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, @@ -4400,7 +4539,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, int err; __u16 source; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); source = __le16_to_cpu(cp->source); @@ -4430,7 +4569,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, static void enable_advertising_instance(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("status %d", status); + bt_dev_dbg(hdev, "status %d", status); } static void set_advertising_complete(struct hci_dev *hdev, u8 status, @@ -4516,7 +4655,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u8 val, status; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_le_support(hdev); if (status) @@ -4625,7 +4764,7 @@ static int set_static_address(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_static_address *cp = data; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, @@ -4670,7 +4809,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, __u16 interval, window; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, @@ -4725,7 +4864,7 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status, { struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -4762,7 +4901,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, struct hci_request req; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) || hdev->hci_ver < BLUETOOTH_VER_1_2) @@ -4823,7 +4962,7 @@ static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_pending_cmd *cmd; - BT_DBG("status 0x%02x", status); + bt_dev_dbg(hdev, "status 0x%02x", status); hci_dev_lock(hdev); @@ -4858,7 +4997,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct hci_request req; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, @@ -4968,7 +5107,7 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; - BT_DBG("%s status %u", hdev->name, status); + bt_dev_dbg(hdev, "status %u", status); hci_dev_lock(hdev); @@ -5017,7 +5156,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, u8 val; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_sc_capable(hdev) && !hci_dev_test_flag(hdev, HCI_LE_ENABLED)) @@ -5103,7 +5242,7 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev, bool changed, use_changed; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, @@ -5150,7 +5289,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data, bool changed; int err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, @@ -5225,7 +5364,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 irk_count, expected_len; int i, err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, @@ -5247,7 +5386,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, MGMT_STATUS_INVALID_PARAMS); } - BT_DBG("%s irk_count %u", hdev->name, irk_count); + bt_dev_dbg(hdev, "irk_count %u", irk_count); for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *key = &cp->irks[i]; @@ -5315,7 +5454,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, u16 key_count, expected_len; int i, err; - BT_DBG("request for %s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, @@ -5337,7 +5476,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS); } - BT_DBG("%s key_count %u", hdev->name, key_count); + bt_dev_dbg(hdev, "key_count %u", key_count); for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; @@ -5438,7 +5577,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, u16 handle; u8 status; - BT_DBG("status 0x%02x", hci_status); + bt_dev_dbg(hdev, "status 0x%02x", hci_status); hci_dev_lock(hdev); @@ -5492,7 +5631,7 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, unsigned long conn_info_age; int err = 0; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); @@ -5646,7 +5785,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct mgmt_pending_cmd *cmd; struct hci_conn *conn; - BT_DBG("%s status %u", hdev->name, status); + bt_dev_dbg(hdev, "status %u", status); hci_dev_lock(hdev); @@ -5683,7 +5822,7 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, struct hci_conn *conn; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); memset(&rp, 0, sizeof(rp)); bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); @@ -5804,8 +5943,8 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, params->auto_connect = auto_connect; - BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, - auto_connect); + bt_dev_dbg(hdev, "addr %pMR (type %u) auto_connect %u", + addr, addr_type, auto_connect); return 0; } @@ -5829,7 +5968,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, u8 auto_conn, addr_type; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!bdaddr_type_is_valid(cp->addr.type) || !bacmp(&cp->addr.bdaddr, BDADDR_ANY)) @@ -5927,7 +6066,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_remove_device *cp = data; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -6036,7 +6175,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(p); } - BT_DBG("All LE connection parameters were removed"); + bt_dev_dbg(hdev, "All LE connection parameters were removed"); hci_update_background_scan(hdev); } @@ -6079,7 +6218,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } - BT_DBG("%s param_count %u", hdev->name, param_count); + bt_dev_dbg(hdev, "param_count %u", param_count); hci_dev_lock(hdev); @@ -6091,8 +6230,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, u16 min, max, latency, timeout; u8 addr_type; - BT_DBG("Adding %pMR (type %u)", ¶m->addr.bdaddr, - param->addr.type); + bt_dev_dbg(hdev, "Adding %pMR (type %u)", ¶m->addr.bdaddr, + param->addr.type); if (param->addr.type == BDADDR_LE_PUBLIC) { addr_type = ADDR_LE_DEV_PUBLIC; @@ -6108,8 +6247,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, latency = le16_to_cpu(param->latency); timeout = le16_to_cpu(param->timeout); - BT_DBG("min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x", - min, max, latency, timeout); + bt_dev_dbg(hdev, "min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x", + min, max, latency, timeout); if (hci_check_conn_params(min, max, latency, timeout) < 0) { bt_dev_err(hdev, "ignoring invalid connection parameters"); @@ -6142,7 +6281,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, bool changed; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, @@ -6198,7 +6337,7 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev, bool changed; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, @@ -6253,7 +6392,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, u16 eir_len; int err; - BT_DBG("%s status %u", hdev->name, status); + bt_dev_dbg(hdev, "status %u", status); cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev); if (!cmd) @@ -6392,7 +6531,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, u8 status, flags, role, addr[7], hash[16], rand[16]; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (hdev_is_powered(hdev)) { switch (cp->type) { @@ -6579,7 +6718,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, u32 supported_flags; u8 *instance; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, @@ -6722,7 +6861,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, struct adv_info *adv_instance, *n; u8 instance; - BT_DBG("status %d", status); + bt_dev_dbg(hdev, "status %d", status); hci_dev_lock(hdev); @@ -6781,7 +6920,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, struct mgmt_pending_cmd *cmd; struct hci_request req; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); status = mgmt_le_support(hdev); if (status) @@ -6918,7 +7057,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status, struct mgmt_cp_remove_advertising *cp; struct mgmt_rp_remove_advertising rp; - BT_DBG("status %d", status); + bt_dev_dbg(hdev, "status %d", status); hci_dev_lock(hdev); @@ -6950,7 +7089,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, struct hci_request req; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); hci_dev_lock(hdev); @@ -7022,7 +7161,7 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, u32 flags, supported_flags; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "sock %p", sk); if (!lmp_le_capable(hdev)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, @@ -7152,6 +7291,12 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_wideband_speech, MGMT_SETTING_SIZE }, { read_security_info, MGMT_READ_SECURITY_INFO_SIZE, HCI_MGMT_UNTRUSTED }, + { read_exp_features_info, MGMT_READ_EXP_FEATURES_INFO_SIZE, + HCI_MGMT_UNTRUSTED | + HCI_MGMT_HDEV_OPTIONAL }, + { set_exp_feature, MGMT_SET_EXP_FEATURE_SIZE, + HCI_MGMT_VAR_LEN | + HCI_MGMT_HDEV_OPTIONAL }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -7250,7 +7395,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; - BT_DBG("err %d", err); + bt_dev_dbg(hdev, "err %d", err); hci_dev_lock(hdev); @@ -7669,7 +7814,7 @@ int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct mgmt_ev_user_confirm_request ev; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); @@ -7685,7 +7830,7 @@ int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct mgmt_ev_user_passkey_request ev; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); @@ -7746,7 +7891,7 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct mgmt_ev_passkey_notify ev; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); @@ -8165,7 +8310,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) { struct mgmt_ev_discovering ev; - BT_DBG("%s discovering %u", hdev->name, discovering); + bt_dev_dbg(hdev, "discovering %u", discovering); memset(&ev, 0, sizeof(ev)); ev.type = hdev->discovery.type; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index df22cbf94693..5510017cf9ff 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -508,7 +508,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], if (!chan || !chan->data) return false; - BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk); + bt_dev_dbg(hdev, "RPA %pMR IRK %*phN", bdaddr, 16, irk); err = smp_ah(irk, &bdaddr->b[3], hash); if (err) @@ -534,7 +534,7 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa) if (err < 0) return err; - BT_DBG("RPA %pMR", rpa); + bt_dev_dbg(hdev, "RPA %pMR", rpa); return 0; } @@ -551,7 +551,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) smp = chan->data; if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { - BT_DBG("Using debug keys"); + bt_dev_dbg(hdev, "Using debug keys"); err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk); if (err) return err; @@ -1867,7 +1867,7 @@ static u8 sc_send_public_key(struct smp_chan *smp) { struct hci_dev *hdev = smp->conn->hcon->hdev; - BT_DBG(""); + bt_dev_dbg(hdev, ""); if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) { struct l2cap_chan *chan = hdev->smp_data; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ca685c0cdf95..a0e9a7937412 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -563,18 +563,32 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, unsigned br_hr, dev_hr; bool changed_addr; - /* Don't allow bridging non-ethernet like devices, or DSA-enabled - * master network devices since the bridge layer rx_handler prevents - * the DSA fake ethertype handler to be invoked, so we do not strip off - * the DSA switch tag protocol header and the bridge layer just return - * RX_HANDLER_CONSUMED, stopping RX processing for these frames. - */ + /* Don't allow bridging non-ethernet like devices. */ if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || - !is_valid_ether_addr(dev->dev_addr) || - netdev_uses_dsa(dev)) + !is_valid_ether_addr(dev->dev_addr)) return -EINVAL; + /* Also don't allow bridging of net devices that are DSA masters, since + * the bridge layer rx_handler prevents the DSA fake ethertype handler + * to be invoked, so we don't get the chance to strip off and parse the + * DSA switch tag protocol header (the bridge layer just returns + * RX_HANDLER_CONSUMED, stopping RX processing for these frames). + * The only case where that would not be an issue is when bridging can + * already be offloaded, such as when the DSA master is itself a DSA + * or plain switchdev port, and is bridged only with other ports from + * the same hardware device. + */ + if (netdev_uses_dsa(dev)) { + list_for_each_entry(p, &br->port_list, list) { + if (!netdev_port_same_parent_id(dev, p->dev)) { + NL_SET_ERR_MSG(extack, + "Cannot do software bridging with a DSA master"); + return -EINVAL; + } + } + } + /* No bridging of bridges */ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) { NL_SET_ERR_MSG(extack, @@ -618,7 +632,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, if (err) goto err3; - err = netdev_rx_handler_register(dev, br_handle_frame, p); + err = netdev_rx_handler_register(dev, br_get_rx_handler(dev), p); if (err) goto err4; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index d5c34f36f0f4..59a318b9f646 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -17,6 +17,7 @@ #endif #include <linux/neighbour.h> #include <net/arp.h> +#include <net/dsa.h> #include <linux/export.h> #include <linux/rculist.h> #include "br_private.h" @@ -257,7 +258,7 @@ frame_finish: * Return NULL if skb is handled * note: already called with rcu_read_lock */ -rx_handler_result_t br_handle_frame(struct sk_buff **pskb) +static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { struct net_bridge_port *p; struct sk_buff *skb = *pskb; @@ -359,3 +360,23 @@ drop: } return RX_HANDLER_CONSUMED; } + +/* This function has no purpose other than to appease the br_port_get_rcu/rtnl + * helpers which identify bridged ports according to the rx_handler installed + * on them (so there _needs_ to be a bridge rx_handler even if we don't need it + * to do anything useful). This bridge won't support traffic to/from the stack, + * but only hardware bridging. So return RX_HANDLER_PASS so we don't steal + * frames from the ETH_P_XDSA packet_type handler. + */ +static rx_handler_result_t br_handle_frame_dummy(struct sk_buff **pskb) +{ + return RX_HANDLER_PASS; +} + +rx_handler_func_t *br_get_rx_handler(const struct net_device *dev) +{ + if (netdev_uses_dsa(dev)) + return br_handle_frame_dummy; + + return br_handle_frame; +} diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 503896638be0..397e7f710772 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -28,7 +28,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, int err; if (br->stp_enabled != BR_NO_STP) { - NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled\n"); + NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled"); return -EINVAL; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a774e19c41bb..240e260e3461 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -615,6 +615,7 @@ int br_process_vlan_info(struct net_bridge *br, v - 1, rtm_cmd); v_change_start = 0; } + cond_resched(); } /* v_change_start is set only if the last/whole range changed */ if (v_change_start) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c35647cb138a..7501be4eeba0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -598,10 +598,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); static inline void br_netpoll_send_skb(const struct net_bridge_port *p, struct sk_buff *skb) { - struct netpoll *np = p->np; - - if (np) - netpoll_send_skb(np, skb); + netpoll_send_skb(p->np, skb); } int br_netpoll_enable(struct net_bridge_port *p); @@ -705,16 +702,16 @@ int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev); /* br_input.c */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); -rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +rx_handler_func_t *br_get_rx_handler(const struct net_device *dev); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) { - return rcu_dereference(dev->rx_handler) == br_handle_frame; + return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev); } static inline bool br_rx_handler_check_rtnl(const struct net_device *dev) { - return rcu_dereference_rtnl(dev->rx_handler) == br_handle_frame; + return rcu_dereference_rtnl(dev->rx_handler) == br_get_rx_handler(dev); } static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) @@ -1331,7 +1328,7 @@ static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) static inline bool br_mrp_enabled(struct net_bridge *br) { - return 0; + return false; } static inline void br_mrp_port_del(struct net_bridge *br, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a42850b7eb9a..ba55851fe132 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -203,7 +203,7 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val, if (br_mrp_enabled(br)) { NL_SET_ERR_MSG_MOD(extack, - "STP can't be enabled if MRP is already enabled\n"); + "STP can't be enabled if MRP is already enabled"); return -EINVAL; } diff --git a/net/compat.c b/net/compat.c index 4bed96e84d9a..69fc6d1e4e6e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -56,7 +56,8 @@ int __get_compat_msghdr(struct msghdr *kmsg, if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); - kmsg->msg_control = compat_ptr(msg.msg_control); + kmsg->msg_control_is_user = true; + kmsg->msg_control_user = compat_ptr(msg.msg_control); kmsg->msg_controllen = msg.msg_controllen; if (save_addr) @@ -121,7 +122,7 @@ int get_compat_msghdr(struct msghdr *kmsg, ((ucmlen) >= sizeof(struct compat_cmsghdr) && \ (ucmlen) <= (unsigned long) \ ((mhdr)->msg_controllen - \ - ((char *)(ucmsg) - (char *)(mhdr)->msg_control))) + ((char __user *)(ucmsg) - (char __user *)(mhdr)->msg_control_user))) static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg, struct compat_cmsghdr __user *cmsg, int cmsg_len) diff --git a/net/core/dev.c b/net/core/dev.c index afff16849c26..4c91de39890a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -398,6 +398,74 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); +#ifdef CONFIG_LOCKDEP +/* + * register_netdevice() inits txq->_xmit_lock and sets lockdep class + * according to dev->type + */ +static const unsigned short netdev_lock_type[] = { + ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, + ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, + ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, + ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, + ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, + ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, + ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, + ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, + ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, + ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, + ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, + ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, + ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, + ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, + ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; + +static const char *const netdev_lock_name[] = { + "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", + "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", + "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; + +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; + +static inline unsigned short netdev_lock_pos(unsigned short dev_type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) + if (netdev_lock_type[i] == dev_type) + return i; + /* the last key is used by default */ + return ARRAY_SIZE(netdev_lock_type) - 1; +} + +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ + int i; + + i = netdev_lock_pos(dev_type); + lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], + netdev_lock_name[i]); +} +#else +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +#endif + /******************************************************************************* * * Protocol management and registration routines @@ -7793,6 +7861,28 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +/** + * netdev_get_xmit_slave - Get the xmit slave of master device + * @skb: The packet + * @all_slaves: assume all the slaves are active + * + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + * %NULL is returned if no slave is found. + */ + +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_xmit_slave) + return NULL; + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); +} +EXPORT_SYMBOL(netdev_get_xmit_slave); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -9208,7 +9298,7 @@ static void netdev_init_one_queue(struct net_device *dev, { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@ -9255,22 +9345,6 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); -static void netdev_register_lockdep_key(struct net_device *dev) -{ - lockdep_register_key(&dev->qdisc_tx_busylock_key); - lockdep_register_key(&dev->qdisc_running_key); - lockdep_register_key(&dev->qdisc_xmit_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); -} - -static void netdev_unregister_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->qdisc_tx_busylock_key); - lockdep_unregister_key(&dev->qdisc_running_key); - lockdep_unregister_key(&dev->qdisc_xmit_lock_key); - lockdep_unregister_key(&dev->addr_list_lock_key); -} - void netdev_update_lockdep_key(struct net_device *dev) { lockdep_unregister_key(&dev->addr_list_lock_key); @@ -9837,7 +9911,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - netdev_register_lockdep_key(dev); + lockdep_register_key(&dev->addr_list_lock_key); dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; @@ -9926,7 +10000,7 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - netdev_unregister_lockdep_key(dev); + lockdep_unregister_key(&dev->addr_list_lock_key); /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { diff --git a/net/core/devlink.c b/net/core/devlink.c index 80f97722f31f..7b76e5fffc10 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3716,24 +3716,26 @@ nla_put_failure: return err; } -static void devlink_nl_region_notify(struct devlink_region *region, - struct devlink_snapshot *snapshot, - enum devlink_command cmd) +static struct sk_buff * +devlink_nl_region_notify_build(struct devlink_region *region, + struct devlink_snapshot *snapshot, + enum devlink_command cmd, u32 portid, u32 seq) { struct devlink *devlink = region->devlink; struct sk_buff *msg; void *hdr; int err; - WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) - return; + return ERR_PTR(-ENOMEM); - hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd); - if (!hdr) + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd); + if (!hdr) { + err = -EMSGSIZE; goto out_free_msg; + } err = devlink_nl_put_handle(msg, devlink); if (err) @@ -3757,15 +3759,30 @@ static void devlink_nl_region_notify(struct devlink_region *region, } genlmsg_end(msg, hdr); - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); - - return; + return msg; out_cancel_msg: genlmsg_cancel(msg, hdr); out_free_msg: nlmsg_free(msg); + return ERR_PTR(err); +} + +static void devlink_nl_region_notify(struct devlink_region *region, + struct devlink_snapshot *snapshot, + enum devlink_command cmd) +{ + struct devlink *devlink = region->devlink; + struct sk_buff *msg; + + WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); + + msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); + if (IS_ERR(msg)) + return; + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } /** @@ -4069,6 +4086,8 @@ static int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct devlink_snapshot *snapshot; + struct nlattr *snapshot_id_attr; struct devlink_region *region; const char *region_name; u32 snapshot_id; @@ -4080,11 +4099,6 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - if (!info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { - NL_SET_ERR_MSG_MOD(info->extack, "No snapshot id provided"); - return -EINVAL; - } - region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]); region = devlink_region_get_by_name(devlink, region_name); if (!region) { @@ -4102,16 +4116,25 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) return -ENOSPC; } - snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]); + snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; + if (snapshot_id_attr) { + snapshot_id = nla_get_u32(snapshot_id_attr); - if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { - NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); - return -EEXIST; - } + if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { + NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); + return -EEXIST; + } - err = __devlink_snapshot_id_insert(devlink, snapshot_id); - if (err) - return err; + err = __devlink_snapshot_id_insert(devlink, snapshot_id); + if (err) + return err; + } else { + err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id"); + return err; + } + } err = region->ops->snapshot(devlink, info->extack, &data); if (err) @@ -4121,6 +4144,27 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) if (err) goto err_snapshot_create; + if (!snapshot_id_attr) { + struct sk_buff *msg; + + snapshot = devlink_region_snapshot_get_by_id(region, + snapshot_id); + if (WARN_ON(!snapshot)) + return -EINVAL; + + msg = devlink_nl_region_notify_build(region, snapshot, + DEVLINK_CMD_REGION_NEW, + info->snd_portid, + info->snd_seq); + err = PTR_ERR_OR_ZERO(msg); + if (err) + goto err_notify; + + err = genlmsg_reply(msg, info); + if (err) + goto err_notify; + } + return 0; err_snapshot_create: @@ -4128,6 +4172,10 @@ err_snapshot_create: err_snapshot_capture: __devlink_snapshot_id_decrement(devlink, snapshot_id); return err; + +err_notify: + devlink_region_snapshot_del(region, snapshot); + return err; } static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, @@ -4167,7 +4215,6 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, struct nlattr **attrs, u64 start_offset, u64 end_offset, - bool dump, u64 *new_offset) { struct devlink_snapshot *snapshot; @@ -4182,9 +4229,6 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, if (!snapshot) return -EINVAL; - if (end_offset > region->size || dump) - end_offset = region->size; - while (curr_offset < end_offset) { u32 data_size; u8 *data; @@ -4212,13 +4256,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); - u64 ret_offset, start_offset, end_offset = 0; + u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->attrs; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; - bool dump = true; void *hdr; int err; @@ -4246,8 +4289,21 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_unlock; } + if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && + attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { + if (!start_offset) + start_offset = + nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); + + end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); + end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]); + } + + if (end_offset > region->size) + end_offset = region->size; + /* return 0 if there is no further data to read */ - if (start_offset >= region->size) { + if (start_offset == end_offset) { err = 0; goto out_unlock; } @@ -4274,22 +4330,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; } - if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && - attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { - if (!start_offset) - start_offset = - nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); - - end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); - end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]); - dump = false; - } - err = devlink_nl_region_read_snapshot_fill(skb, devlink, region, attrs, start_offset, - end_offset, dump, - &ret_offset); + end_offset, &ret_offset); if (err && err != -EMSGSIZE) goto nla_put_failure; @@ -5363,6 +5407,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, { enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; + unsigned long recover_ts_threshold; /* write a log message of the current error */ WARN_ON(!msg); @@ -5373,10 +5418,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); /* abort if the previous error wasn't recovered */ + recover_ts_threshold = reporter->last_recovery_ts + + msecs_to_jiffies(reporter->graceful_period); if (reporter->auto_recover && (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || - jiffies - reporter->last_recovery_ts < - msecs_to_jiffies(reporter->graceful_period))) { + (reporter->last_recovery_ts && reporter->recovery_count && + time_is_after_jiffies(recover_ts_threshold)))) { trace_devlink_health_recover_aborted(devlink, reporter->ops->name, reporter->health_state, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 8e33cec9fc4e..2ee7bc4c9e03 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -213,6 +213,7 @@ static void sched_send_work(struct timer_list *t) static void trace_drop_common(struct sk_buff *skb, void *location) { struct net_dm_alert_msg *msg; + struct net_dm_drop_point *point; struct nlmsghdr *nlh; struct nlattr *nla; int i; @@ -231,11 +232,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location) nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); + point = msg->points; for (i = 0; i < msg->entries; i++) { - if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { - msg->points[i].count++; + if (!memcmp(&location, &point->pc, sizeof(void *))) { + point->count++; goto out; } + point++; } if (msg->entries == dm_hit_limit) goto out; @@ -244,8 +247,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) */ __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); - memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); - msg->points[msg->entries].count = 1; + memcpy(point->pc, &location, sizeof(void *)); + point->count = 1; msg->entries++; if (!timer_pending(&data->send_timer)) { diff --git a/net/core/dst.c b/net/core/dst.c index 193af526e908..d6b6ced0d451 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -81,11 +81,11 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, { struct dst_entry *dst; - if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { + if (ops->gc && + !(flags & DST_NOCOUNT) && + dst_entries_get_fast(ops) > ops->gc_thresh) { if (ops->gc(ops)) { - printk_ratelimited(KERN_NOTICE "Route cache is full: " - "consider increasing sysctl " - "net.ipv[4|6].route.max_size.\n"); + pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n"); return NULL; } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3f2263e79e4b..b607ea602774 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1956,6 +1956,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, NEIGH_UPDATE_F_OVERRIDE_ISROUTER); } + if (protocol) + neigh->protocol = protocol; + if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; @@ -1969,9 +1972,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (protocol) - neigh->protocol = protocol; - neigh_release(neigh); out: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 15b366a1a958..093e90e52bc2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -305,20 +305,22 @@ static int netpoll_owner_active(struct net_device *dev) } /* call with IRQ disabled */ -void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, - struct net_device *dev) +static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; lockdep_assert_irqs_disabled(); - npinfo = rcu_dereference_bh(np->dev->npinfo); + dev = np->dev; + npinfo = rcu_dereference_bh(dev->npinfo); + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return; + return NET_XMIT_DROP; } /* don't get messages out of order, and no recursion */ @@ -357,8 +359,25 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } + return NETDEV_TX_OK; +} + +netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +{ + unsigned long flags; + netdev_tx_t ret; + + if (unlikely(!np)) { + dev_kfree_skb_irq(skb); + ret = NET_XMIT_DROP; + } else { + local_irq_save(flags); + ret = __netpoll_send_skb(np, skb); + local_irq_restore(flags); + } + return ret; } -EXPORT_SYMBOL(netpoll_send_skb_on_dev); +EXPORT_SYMBOL(netpoll_send_skb); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { diff --git a/net/core/scm.c b/net/core/scm.c index dc6fed1f221c..875df1c2989d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -212,16 +212,12 @@ EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { - struct cmsghdr __user *cm - = (__force struct cmsghdr __user *)msg->msg_control; - struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); - int err; - if (MSG_CMSG_COMPAT & msg->msg_flags) + if (msg->msg_flags & MSG_CMSG_COMPAT) return put_cmsg_compat(msg, level, type, len, data); - if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { + if (!msg->msg_control || msg->msg_controllen < sizeof(struct cmsghdr)) { msg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } @@ -229,23 +225,30 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) msg->msg_flags |= MSG_CTRUNC; cmlen = msg->msg_controllen; } - cmhdr.cmsg_level = level; - cmhdr.cmsg_type = type; - cmhdr.cmsg_len = cmlen; - - err = -EFAULT; - if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) - goto out; - if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr))) - goto out; - cmlen = CMSG_SPACE(len); - if (msg->msg_controllen < cmlen) - cmlen = msg->msg_controllen; + + if (msg->msg_control_is_user) { + struct cmsghdr __user *cm = msg->msg_control_user; + struct cmsghdr cmhdr; + + cmhdr.cmsg_level = level; + cmhdr.cmsg_type = type; + cmhdr.cmsg_len = cmlen; + if (copy_to_user(cm, &cmhdr, sizeof cmhdr) || + copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm))) + return -EFAULT; + } else { + struct cmsghdr *cm = msg->msg_control; + + cm->cmsg_level = level; + cm->cmsg_type = type; + cm->cmsg_len = cmlen; + memcpy(CMSG_DATA(cm), data, cmlen - sizeof(*cm)); + } + + cmlen = min(CMSG_SPACE(len), msg->msg_controllen); msg->msg_control += cmlen; msg->msg_controllen -= cmlen; - err = 0; -out: - return err; + return 0; } EXPORT_SYMBOL(put_cmsg); @@ -277,78 +280,90 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter } EXPORT_SYMBOL(put_cmsg_scm_timestamping); +static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags) +{ + struct socket *sock; + int new_fd; + int error; + + error = security_file_receive(file); + if (error) + return error; + + new_fd = get_unused_fd_flags(o_flags); + if (new_fd < 0) + return new_fd; + + error = put_user(new_fd, ufd); + if (error) { + put_unused_fd(new_fd); + return error; + } + + /* Bump the usage count and install the file. */ + sock = sock_from_file(file, &error); + if (sock) { + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); + } + fd_install(new_fd, get_file(file)); + return 0; +} + +static int scm_max_fds(struct msghdr *msg) +{ + if (msg->msg_controllen <= sizeof(struct cmsghdr)) + return 0; + return (msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int); +} + void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm = (__force struct cmsghdr __user*)msg->msg_control; - - int fdmax = 0; - int fdnum = scm->fp->count; - struct file **fp = scm->fp->fp; - int __user *cmfptr; + int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); + int __user *cmsg_data = CMSG_USER_DATA(cm); int err = 0, i; - if (MSG_CMSG_COMPAT & msg->msg_flags) { + if (msg->msg_flags & MSG_CMSG_COMPAT) { scm_detach_fds_compat(msg, scm); return; } - if (msg->msg_controllen > sizeof(struct cmsghdr)) - fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr)) - / sizeof(int)); - - if (fdnum < fdmax) - fdmax = fdnum; + /* no use for FD passing from kernel space callers */ + if (WARN_ON_ONCE(!msg->msg_control_is_user)) + return; - for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; - i++, cmfptr++) - { - struct socket *sock; - int new_fd; - err = security_file_receive(fp[i]); + for (i = 0; i < fdmax; i++) { + err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err) break; - err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags - ? O_CLOEXEC : 0); - if (err < 0) - break; - new_fd = err; - err = put_user(new_fd, cmfptr); - if (err) { - put_unused_fd(new_fd); - break; - } - /* Bump the usage count and install the file. */ - sock = sock_from_file(fp[i], &err); - if (sock) { - sock_update_netprioidx(&sock->sk->sk_cgrp_data); - sock_update_classid(&sock->sk->sk_cgrp_data); - } - fd_install(new_fd, get_file(fp[i])); } - if (i > 0) - { - int cmlen = CMSG_LEN(i*sizeof(int)); + if (i > 0) { + int cmlen = CMSG_LEN(i * sizeof(int)); + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { - cmlen = CMSG_SPACE(i*sizeof(int)); + cmlen = CMSG_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } } - if (i < fdnum || (fdnum && fdmax <= 0)) + + if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* - * All of the files that fit in the message have had their - * usage counts incremented, so we just free the list. + * All of the files that fit in the message have had their usage counts + * incremented, so we just free the list. */ __scm_destroy(scm); } diff --git a/net/core/sock.c b/net/core/sock.c index 90509c37d291..fd85e651ce28 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1152,23 +1152,31 @@ set_rcvbuf: break; case SO_TXTIME: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { - ret = -EPERM; - } else if (optlen != sizeof(struct sock_txtime)) { + if (optlen != sizeof(struct sock_txtime)) { ret = -EINVAL; + break; } else if (copy_from_user(&sk_txtime, optval, sizeof(struct sock_txtime))) { ret = -EFAULT; + break; } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { ret = -EINVAL; - } else { - sock_valbool_flag(sk, SOCK_TXTIME, true); - sk->sk_clockid = sk_txtime.clockid; - sk->sk_txtime_deadline_mode = - !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); - sk->sk_txtime_report_errors = - !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); + break; + } + /* CLOCK_MONOTONIC is only used by sch_fq, and this packet + * scheduler has enough safe guards. + */ + if (sk_txtime.clockid != CLOCK_MONOTONIC && + !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; } + sock_valbool_flag(sk, SOCK_TXTIME, true); + sk->sk_clockid = sk_txtime.clockid; + sk->sk_txtime_deadline_mode = + !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); + sk->sk_txtime_report_errors = + !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); break; case SO_BINDTOIFINDEX: @@ -2364,7 +2372,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -/* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0384a911779e..1ce9ba8cf545 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -412,6 +412,15 @@ void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, } EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister); +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) +{ + if (!netdev || !dsa_slave_dev_check(netdev)) + return ERR_PTR(-ENODEV); + + return dsa_slave_to_port(netdev); +} +EXPORT_SYMBOL_GPL(dsa_port_from_netdev); + static int __init dsa_init_module(void) { int rc; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9a271a58a41d..076908fdd29b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -24,6 +24,27 @@ LIST_HEAD(dsa_tree_list); static const struct devlink_ops dsa_devlink_ops = { }; +struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + list_for_each_entry(dst, &dsa_tree_list, list) { + if (dst->index != tree_index) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->index != sw_index) + continue; + + return dp->ds; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_switch_find); + static struct dsa_switch_tree *dsa_tree_find(int index) { struct dsa_switch_tree *dst; @@ -459,7 +480,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); if (err) - goto teardown; + continue; } return 0; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 6d9a1ef65fa0..adecf73bd608 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -35,6 +35,7 @@ struct dsa_notifier_ageing_time_info { /* DSA_NOTIFIER_BRIDGE_* */ struct dsa_notifier_bridge_info { struct net_device *br; + int tree_index; int sw_index; int port; }; @@ -137,6 +138,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans); +bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, struct switchdev_trans *trans); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, diff --git a/net/dsa/master.c b/net/dsa/master.c index b5c535af63a3..a621367c6e8c 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -289,7 +289,8 @@ static void dsa_master_ndo_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; - dev->netdev_ops = cpu_dp->orig_ndo_ops; + if (cpu_dp->orig_ndo_ops) + dev->netdev_ops = cpu_dp->orig_ndo_ops; cpu_dp->orig_ndo_ops = NULL; } diff --git a/net/dsa/port.c b/net/dsa/port.c index a58fdd362574..e23ece229c7e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -13,6 +13,23 @@ #include "dsa_priv.h" +static int dsa_broadcast(unsigned long e, void *v) +{ + struct dsa_switch_tree *dst; + int err = 0; + + list_for_each_entry(dst, &dsa_tree_list, list) { + struct raw_notifier_head *nh = &dst->nh; + + err = raw_notifier_call_chain(nh, e, v); + err = notifier_to_errno(err); + if (err) + break; + } + + return err; +} + static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { struct raw_notifier_head *nh = &dp->ds->dst->nh; @@ -120,6 +137,7 @@ void dsa_port_disable(struct dsa_port *dp) int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { + .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, .br = br, @@ -136,7 +154,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) */ dp->bridge_dev = br; - err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info); /* The bridging is rolled back on error */ if (err) { @@ -150,6 +168,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { + .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, .br = br, @@ -161,7 +180,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) */ dp->bridge_dev = NULL; - err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); @@ -238,6 +257,20 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, return 0; } +/* This enforces legacy behavior for switch drivers which assume they can't + * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0 + */ +bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!dp->bridge_dev) + return false; + + return (!ds->configure_vlan_while_not_filtering && + !br_vlan_enabled(dp->bridge_dev)); +} + int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, struct switchdev_trans *trans) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ba8bf90dc0cc..886490fb203d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -314,7 +314,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, if (obj->orig_dev != dev) return -EOPNOTSUPP; - if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) return 0; vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); @@ -381,7 +381,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, if (obj->orig_dev != dev) return -EOPNOTSUPP; - if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) return 0; /* Do not deprogram the CPU port as it may be shared with other user @@ -445,12 +445,11 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, #ifdef CONFIG_NET_POLL_CONTROLLER struct dsa_slave_priv *p = netdev_priv(dev); - if (p->netpoll) - netpoll_send_skb(p->netpoll, skb); + return netpoll_send_skb(p->netpoll, skb); #else BUG(); -#endif return NETDEV_TX_OK; +#endif } static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, @@ -856,20 +855,18 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, struct dsa_port *to_dp; int err; - act = &cls->rule->action.entries[0]; - if (!ds->ops->port_mirror_add) return -EOPNOTSUPP; - if (!act->dev) - return -EINVAL; - if (!flow_action_basic_hw_stats_check(&cls->rule->action, cls->common.extack)) return -EOPNOTSUPP; act = &cls->rule->action.entries[0]; + if (!act->dev) + return -EINVAL; + if (!dsa_slave_dev_check(act->dev)) return -EOPNOTSUPP; @@ -913,13 +910,13 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev, if (!ds->ops->port_policer_add) { NL_SET_ERR_MSG_MOD(extack, - "Policing offload not implemented\n"); + "Policing offload not implemented"); return -EOPNOTSUPP; } if (!ingress) { NL_SET_ERR_MSG_MOD(extack, - "Only supported on ingress qdisc\n"); + "Only supported on ingress qdisc"); return -EOPNOTSUPP; } @@ -930,7 +927,7 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev, list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { NL_SET_ERR_MSG_MOD(extack, - "Only one port policer allowed\n"); + "Only one port policer allowed"); return -EEXIST; } } @@ -1243,7 +1240,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { - if (!br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) return 0; /* br_vlan_get_info() returns -EINVAL or -ENOENT if the @@ -1277,7 +1274,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { - if (!br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) return 0; /* br_vlan_get_info() returns -EINVAL or -ENOENT if the @@ -1671,6 +1668,15 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } +static struct lock_class_key dsa_slave_netdev_xmit_lock_key; +static void dsa_slave_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &dsa_slave_netdev_xmit_lock_key); +} + int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); @@ -1754,6 +1760,9 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); + netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, + NULL); + SET_NETDEV_DEV(slave_dev, port->ds->dev); slave_dev->dev.of_node = port->dn; slave_dev->vlan_features = master->vlan_features; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index f3c32ff552b3..86c8dc5c32a0 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -89,11 +89,16 @@ static int dsa_switch_mtu(struct dsa_switch *ds, static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - if (ds->index == info->sw_index && ds->ops->port_bridge_join) + struct dsa_switch_tree *dst = ds->dst; + + if (dst->index == info->tree_index && ds->index == info->sw_index && + ds->ops->port_bridge_join) return ds->ops->port_bridge_join(ds, info->port, info->br); - if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join) - return ds->ops->crosschip_bridge_join(ds, info->sw_index, + if ((dst->index != info->tree_index || ds->index != info->sw_index) && + ds->ops->crosschip_bridge_join) + return ds->ops->crosschip_bridge_join(ds, info->tree_index, + info->sw_index, info->port, info->br); return 0; @@ -103,13 +108,17 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { bool unset_vlan_filtering = br_vlan_enabled(info->br); + struct dsa_switch_tree *dst = ds->dst; int err, i; - if (ds->index == info->sw_index && ds->ops->port_bridge_leave) + if (dst->index == info->tree_index && ds->index == info->sw_index && + ds->ops->port_bridge_join) ds->ops->port_bridge_leave(ds, info->port, info->br); - if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, + if ((dst->index != info->tree_index || ds->index != info->sw_index) && + ds->ops->crosschip_bridge_join) + ds->ops->crosschip_bridge_leave(ds, info->tree_index, + info->sw_index, info->port, info->br); /* If the bridge was vlan_filtering, the bridge core doesn't trigger an diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index b97ad93d1c1a..3052da668156 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -8,6 +8,7 @@ */ #include <linux/if_bridge.h> #include <linux/if_vlan.h> +#include <linux/dsa/8021q.h> #include "dsa_priv.h" @@ -16,7 +17,7 @@ * * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * +-----------+-----+-----------------+-----------+-----------------------+ - * | DIR | RSV | SWITCH_ID | RSV | PORT | + * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT | * +-----------+-----+-----------------+-----------+-----------------------+ * * DIR - VID[11:10]: @@ -26,17 +27,24 @@ * These values make the special VIDs of 0, 1 and 4095 to be left * unused by this coding scheme. * - * RSV - VID[9]: - * To be used for further expansion of SWITCH_ID or for other purposes. - * Must be transmitted as zero and ignored on receive. + * SVL/SUBVLAN - { VID[9], VID[5:4] }: + * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN. + * * 0 (0b000): Field does not encode a sub-VLAN, either because + * received traffic is untagged, PVID-tagged or because a second + * VLAN tag is present after this tag and not inside of it. + * * 1 (0b001): Received traffic is tagged with a VID value private + * to the host. This field encodes the index in the host's lookup + * table through which the value of the ingress VLAN ID can be + * recovered. + * * 2 (0b010): Field encodes a sub-VLAN. + * ... + * * 7 (0b111): Field encodes a sub-VLAN. + * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero + * (by the host) and ignored on receive (by the switch). * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and 7. * - * RSV - VID[5:4]: - * To be used for further expansion of PORT or for other purposes. - * Must be transmitted as zero and ignored on receive. - * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and 15. */ @@ -53,6 +61,18 @@ #define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \ DSA_8021Q_SWITCH_ID_MASK) +#define DSA_8021Q_SUBVLAN_HI_SHIFT 9 +#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) +#define DSA_8021Q_SUBVLAN_LO_SHIFT 4 +#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(4, 3) +#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) +#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) +#define DSA_8021Q_SUBVLAN(x) \ + (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \ + DSA_8021Q_SUBVLAN_LO_MASK) | \ + ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \ + DSA_8021Q_SUBVLAN_HI_MASK)) + #define DSA_8021Q_PORT_SHIFT 0 #define DSA_8021Q_PORT_MASK GENMASK(3, 0) #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ @@ -78,6 +98,13 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); +u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan) +{ + return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | + DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan); +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan); + /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) { @@ -92,6 +119,27 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); +/* Returns the decoded subvlan from the RX VID. */ +u16 dsa_8021q_rx_subvlan(u16 vid) +{ + u16 svl_hi, svl_lo; + + svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >> + DSA_8021Q_SUBVLAN_HI_SHIFT; + svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >> + DSA_8021Q_SUBVLAN_LO_SHIFT; + + return (svl_hi << 2) | svl_lo; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan); + +bool vid_is_dsa_8021q(u16 vid) +{ + return ((vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX || + (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX); +} +EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); + static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) { struct bridge_vlan_info vinfo; @@ -288,6 +336,145 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) } EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); +static int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, bool enabled) +{ + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + + /* @rx_vid of local @ds port @port goes to @other_port of + * @other_ds + */ + return dsa_8021q_vid_apply(other_ds, other_port, rx_vid, + BRIDGE_VLAN_INFO_UNTAGGED, enabled); +} + +static int dsa_8021q_crosschip_link_add(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, + struct list_head *crosschip_links) +{ + struct dsa_8021q_crosschip_link *c; + + list_for_each_entry(c, crosschip_links, list) { + if (c->port == port && c->other_ds == other_ds && + c->other_port == other_port) { + refcount_inc(&c->refcount); + return 0; + } + } + + dev_dbg(ds->dev, "adding crosschip link from port %d to %s port %d\n", + port, dev_name(other_ds->dev), other_port); + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + + c->port = port; + c->other_ds = other_ds; + c->other_port = other_port; + refcount_set(&c->refcount, 1); + + list_add(&c->list, crosschip_links); + + return 0; +} + +static void dsa_8021q_crosschip_link_del(struct dsa_switch *ds, + struct dsa_8021q_crosschip_link *c, + struct list_head *crosschip_links, + bool *keep) +{ + *keep = !refcount_dec_and_test(&c->refcount); + + if (*keep) + return; + + dev_dbg(ds->dev, + "deleting crosschip link from port %d to %s port %d\n", + c->port, dev_name(c->other_ds->dev), c->other_port); + + list_del(&c->list); + kfree(c); +} + +/* Make traffic from local port @port be received by remote port @other_port. + * This means that our @rx_vid needs to be installed on @other_ds's upstream + * and user ports. The user ports should be egress-untagged so that they can + * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged + * or untagged: it doesn't matter, since it should never egress a frame having + * our @rx_vid. + */ +int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, + struct list_head *crosschip_links) +{ + /* @other_upstream is how @other_ds reaches us. If we are part + * of disjoint trees, then we are probably connected through + * our CPU ports. If we're part of the same tree though, we should + * probably use dsa_towards_port. + */ + int other_upstream = dsa_upstream_port(other_ds, other_port); + int rc; + + rc = dsa_8021q_crosschip_link_add(ds, port, other_ds, + other_port, crosschip_links); + if (rc) + return rc; + + rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds, + other_port, true); + if (rc) + return rc; + + rc = dsa_8021q_crosschip_link_add(ds, port, other_ds, + other_upstream, + crosschip_links); + if (rc) + return rc; + + return dsa_8021q_crosschip_link_apply(ds, port, other_ds, + other_upstream, true); +} +EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join); + +int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, + struct list_head *crosschip_links) +{ + int other_upstream = dsa_upstream_port(other_ds, other_port); + struct dsa_8021q_crosschip_link *c, *n; + + list_for_each_entry_safe(c, n, crosschip_links, list) { + if (c->port == port && c->other_ds == other_ds && + (c->other_port == other_port || + c->other_port == other_upstream)) { + struct dsa_switch *other_ds = c->other_ds; + int other_port = c->other_port; + bool keep; + int rc; + + dsa_8021q_crosschip_link_del(ds, c, crosschip_links, + &keep); + if (keep) + continue; + + rc = dsa_8021q_crosschip_link_apply(ds, port, + other_ds, + other_port, + false); + if (rc) + return rc; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave); + struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci) { diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 59de1315100f..b0c98ee4e13b 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -228,7 +228,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, return skb; } -static struct dsa_device_ops ocelot_netdev_ops = { +static const struct dsa_device_ops ocelot_netdev_ops = { .name = "ocelot", .proto = DSA_TAG_PROTO_OCELOT, .xmit = ocelot_xmit, diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index d553bf36bd41..9b4a4d719291 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -69,12 +69,25 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) return true; } +static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb) +{ + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + + if (hdr->h_vlan_proto == htons(ETH_P_SJA1105)) + return true; + + if (hdr->h_vlan_proto != htons(ETH_P_8021Q)) + return false; + + return vid_is_dsa_8021q(ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK); +} + /* This is the first time the tagger sees the frame on RX. * Figure out if we can decode it. */ static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) { - if (!dsa_port_is_vlan_filtering(dev->dsa_ptr)) + if (sja1105_can_use_vlan_as_tags(skb)) return true; if (sja1105_is_link_local(skb)) return true; @@ -96,6 +109,11 @@ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, return NULL; } +static u16 sja1105_xmit_tpid(struct sja1105_port *sp) +{ + return sp->xmit_tpid; +} + static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -111,15 +129,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, if (unlikely(sja1105_is_link_local(skb))) return sja1105_defer_xmit(dp->priv, skb); - /* If we are under a vlan_filtering bridge, IP termination on - * switch ports based on 802.1Q tags is simply too brittle to - * be passable. So just defer to the dsa_slave_notag_xmit - * implementation. - */ - if (dsa_port_is_vlan_filtering(dp)) - return skb; - - return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105, + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); } @@ -244,6 +254,20 @@ static struct sk_buff return skb; } +static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_port *sp = dp->priv; + u16 vid = sp->subvlan_map[subvlan]; + u16 vlan_tci; + + if (vid == VLAN_N_VID) + return; + + vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid; + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); +} + static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) @@ -253,12 +277,13 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct ethhdr *hdr; u16 tpid, vid, tci; bool is_link_local; + u16 subvlan = 0; bool is_tagged; bool is_meta; hdr = eth_hdr(skb); tpid = ntohs(hdr->h_proto); - is_tagged = (tpid == ETH_P_SJA1105); + is_tagged = (tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q); is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); @@ -276,6 +301,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, source_port = dsa_8021q_rx_source_port(vid); switch_id = dsa_8021q_rx_switch_id(vid); skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + subvlan = dsa_8021q_rx_subvlan(vid); } else if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of @@ -300,11 +326,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } + if (subvlan) + sja1105_decode_subvlan(skb, subvlan); + return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, is_meta); } -static struct dsa_device_ops sja1105_netdev_ops = { +static const struct dsa_device_ops sja1105_netdev_ops = { .name = "sja1105", .proto = DSA_TAG_PROTO_SJA1105, .xmit = sja1105_xmit, diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 6c360c9c9370..0c2b94f20499 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ - channels.o coalesce.o pause.o eee.o tsinfo.o + channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c new file mode 100644 index 000000000000..5ba06eabe8c2 --- /dev/null +++ b/net/ethtool/cabletest.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/phy.h> +#include <linux/ethtool_netlink.h> +#include "netlink.h" +#include "common.h" + +/* CABLE_TEST_ACT */ + +static const struct nla_policy +cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = { + [ETHTOOL_A_CABLE_TEST_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CABLE_TEST_HEADER] = { .type = NLA_NESTED }, +}; + +static int ethnl_cable_test_started(struct phy_device *phydev) +{ + struct sk_buff *skb; + int err = -ENOMEM; + void *ehdr; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + goto out; + + ehdr = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_CABLE_TEST_NTF); + if (!ehdr) { + err = -EMSGSIZE; + goto out; + } + + err = ethnl_fill_reply_header(skb, phydev->attached_dev, + ETHTOOL_A_CABLE_TEST_NTF_HEADER); + if (err) + goto out; + + err = nla_put_u8(skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED); + if (err) + goto out; + + genlmsg_end(skb, ehdr); + + return ethnl_multicast(skb, phydev->attached_dev); + +out: + nlmsg_free(skb); + phydev_err(phydev, "%s: Error %pe\n", __func__, ERR_PTR(err)); + + return err; +} + +int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_CABLE_TEST_MAX + 1]; + struct ethnl_req_info req_info = {}; + struct net_device *dev; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_CABLE_TEST_MAX, + cable_test_act_policy, info->extack); + if (ret < 0) + return ret; + + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_CABLE_TEST_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + + dev = req_info.dev; + if (!dev->phydev) { + ret = -EOPNOTSUPP; + goto out_dev_put; + } + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + + ret = phy_start_cable_test(dev->phydev, info->extack); + + ethnl_ops_complete(dev); + + if (!ret) + ethnl_cable_test_started(dev->phydev); + +out_rtnl: + rtnl_unlock(); +out_dev_put: + dev_put(dev); + return ret; +} + +int ethnl_cable_test_alloc(struct phy_device *phydev) +{ + int err = -ENOMEM; + + phydev->skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!phydev->skb) + goto out; + + phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, + ETHTOOL_MSG_CABLE_TEST_NTF); + if (!phydev->ehdr) { + err = -EMSGSIZE; + goto out; + } + + err = ethnl_fill_reply_header(phydev->skb, phydev->attached_dev, + ETHTOOL_A_CABLE_TEST_NTF_HEADER); + if (err) + goto out; + + err = nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED); + if (err) + goto out; + + phydev->nest = nla_nest_start(phydev->skb, + ETHTOOL_A_CABLE_TEST_NTF_NEST); + if (!phydev->nest) { + err = -EMSGSIZE; + goto out; + } + + return 0; + +out: + nlmsg_free(phydev->skb); + phydev->skb = NULL; + return err; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_alloc); + +void ethnl_cable_test_free(struct phy_device *phydev) +{ + nlmsg_free(phydev->skb); + phydev->skb = NULL; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_free); + +void ethnl_cable_test_finished(struct phy_device *phydev) +{ + nla_nest_end(phydev->skb, phydev->nest); + + genlmsg_end(phydev->skb, phydev->ehdr); + + ethnl_multicast(phydev->skb, phydev->attached_dev); +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_finished); + +int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result) +{ + struct nlattr *nest; + int ret = -EMSGSIZE; + + nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_NEST_RESULT); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_PAIR, pair)) + goto err; + if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result)) + goto err; + + nla_nest_end(phydev->skb, nest); + return 0; + +err: + nla_nest_cancel(phydev->skb, nest); + return ret; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_result); + +int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm) +{ + struct nlattr *nest; + int ret = -EMSGSIZE; + + nest = nla_nest_start(phydev->skb, + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, pair)) + goto err; + if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm)) + goto err; + + nla_nest_end(phydev->skb, nest); + return 0; + +err: + nla_nest_cancel(phydev->skb, nest); + return ret; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 226d5ecdd567..52102ab1709b 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -552,6 +552,8 @@ static int ethtool_get_link_ksettings(struct net_device *dev, link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; link_ksettings.base.link_mode_masks_nwords = __ETHTOOL_LINK_MODE_MASK_NU32; + link_ksettings.base.master_slave_cfg = MASTER_SLAVE_CFG_UNSUPPORTED; + link_ksettings.base.master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; return store_link_ksettings_for_user(useraddr, &link_ksettings); } @@ -589,6 +591,10 @@ static int ethtool_set_link_ksettings(struct net_device *dev, != link_ksettings.base.link_mode_masks_nwords) return -EINVAL; + if (link_ksettings.base.master_slave_cfg || + link_ksettings.base.master_slave_state) + return -EINVAL; + err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); if (err >= 0) { ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 452608c6d856..fd4f3e58c6f6 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -27,6 +27,8 @@ linkmodes_get_policy[ETHTOOL_A_LINKMODES_MAX + 1] = { [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE] = { .type = NLA_REJECT }, }; static int linkmodes_prepare_data(const struct ethnl_req_info *req_base, @@ -63,6 +65,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base, { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; + const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int len, ret; @@ -86,6 +89,12 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base, len += ret; } + if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED) + len += nla_total_size(sizeof(u8)); + + if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED) + len += nla_total_size(sizeof(u8)); + return len; } @@ -122,6 +131,16 @@ static int linkmodes_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex)) return -EMSGSIZE; + if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED && + nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, + lsettings->master_slave_cfg)) + return -EMSGSIZE; + + if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED && + nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, + lsettings->master_slave_state)) + return -EMSGSIZE; + return 0; } @@ -249,6 +268,8 @@ linkmodes_set_policy[ETHTOOL_A_LINKMODES_MAX + 1] = { [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 }, [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 }, + [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_U8 }, + [ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE] = { .type = NLA_REJECT }, }; /* Set advertised link modes to all supported modes matching requested speed @@ -287,14 +308,45 @@ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static bool ethnl_validate_master_slave_cfg(u8 cfg) +{ + switch (cfg) { + case MASTER_SLAVE_CFG_MASTER_PREFERRED: + case MASTER_SLAVE_CFG_SLAVE_PREFERRED: + case MASTER_SLAVE_CFG_MASTER_FORCE: + case MASTER_SLAVE_CFG_SLAVE_FORCE: + return true; + } + + return false; +} + static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, bool *mod) { struct ethtool_link_settings *lsettings = &ksettings->base; bool req_speed, req_duplex; + const struct nlattr *master_slave_cfg; int ret; + master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; + if (master_slave_cfg) { + u8 cfg = nla_get_u8(master_slave_cfg); + + if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) { + NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, + "master/slave configuration not supported by device"); + return -EOPNOTSUPP; + } + + if (!ethnl_validate_master_slave_cfg(cfg)) { + NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, + "master/slave value is invalid"); + return -EOPNOTSUPP; + } + } + *mod = false; req_speed = tb[ETHTOOL_A_LINKMODES_SPEED]; req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX]; @@ -311,6 +363,7 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, mod); ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX], mod); + ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod); if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg && (req_speed || req_duplex) && diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 0c772318c023..87bc02da74bc 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -181,13 +181,13 @@ err: return NULL; } -static void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) +void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) { return genlmsg_put(skb, 0, ++ethnl_bcast_seq, ðtool_genl_family, 0, cmd); } -static int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) +int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) { return genlmsg_multicast_netns(ðtool_genl_family, dev_net(dev), skb, 0, ETHNL_MCGRP_MONITOR, GFP_KERNEL); @@ -839,6 +839,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_CABLE_TEST_ACT, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_act_cable_test, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 81b8fa020bcb..b0eb5d920099 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -19,6 +19,8 @@ int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, u16 hdr_attrtype, struct genl_info *info, void **ehdrp); +void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); +int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); /** * ethnl_strz_size() - calculate attribute length for fixed size string @@ -357,5 +359,6 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info); int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info); int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info); int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info); +int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ddd9605bad04..ed13760463de 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -321,7 +321,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, if (ethhdr->h_proto == htons(ETH_P_8021Q)) { frame->is_vlan = true; /* FIXME: */ - WARN_ONCE(1, "HSR: VLAN not yet supported"); + netdev_warn_once(skb->dev, "VLAN not yet supported"); } if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index f4b9f7a3ce51..25b6ffba26cd 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -18,7 +18,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; - u16 protocol; + __be16 protocol; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index c0b107cdd715..3297e7fa9945 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -58,6 +58,13 @@ static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; +static int lowpan_dev_init(struct net_device *ldev) +{ + netdev_lockdep_set_classes(ldev); + + return 0; +} + static int lowpan_open(struct net_device *dev) { if (!open_count) @@ -89,6 +96,7 @@ static int lowpan_get_iflink(const struct net_device *dev) } static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c index ee179380a766..b34d050c9687 100644 --- a/net/ieee802154/6lowpan/rx.c +++ b/net/ieee802154/6lowpan/rx.c @@ -240,7 +240,7 @@ static inline bool lowpan_is_reserved(u8 dispatch) return ((dispatch >= 0x44 && dispatch <= 0x4F) || (dispatch >= 0x51 && dispatch <= 0x5F) || (dispatch >= 0xc8 && dispatch <= 0xdf) || - (dispatch >= 0xe8 && dispatch <= 0xff)); + dispatch >= 0xe8); } /* lowpan_rx_h_check checks on generic 6LoWPAN requirements diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 029b24eeafba..0ce9b91ff55c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -248,6 +248,15 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } +static bool is_erspan_type1(int gre_hdr_len) +{ + /* Both ERSPAN type I (version 0) and type II (version 1) use + * protocol 0x88BE, but the type I has only 4-byte GRE header, + * while type II has 8-byte. + */ + return gre_hdr_len == 4; +} + static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int gre_hdr_len) { @@ -262,17 +271,26 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int len; itn = net_generic(net, erspan_net_id); - iph = ip_hdr(skb); - ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); - ver = ershdr->ver; - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, - tpi->flags | TUNNEL_KEY, - iph->saddr, iph->daddr, tpi->key); + if (is_erspan_type1(gre_hdr_len)) { + ver = 0; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, + tpi->flags | TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + } else { + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); + ver = ershdr->ver; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, + tpi->flags | TUNNEL_KEY, + iph->saddr, iph->daddr, tpi->key); + } if (tunnel) { - len = gre_hdr_len + erspan_hdr_len(ver); + if (is_erspan_type1(gre_hdr_len)) + len = gre_hdr_len; + else + len = gre_hdr_len + erspan_hdr_len(ver); + if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; @@ -665,7 +683,10 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - if (tunnel->erspan_ver == 1) { + if (tunnel->erspan_ver == 0) { + proto = htons(ETH_P_ERSPAN); + tunnel->parms.o_flags &= ~TUNNEL_SEQ; + } else if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); @@ -1066,7 +1087,11 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], if (ret) return ret; - /* ERSPAN should only have GRE sequence and key flag */ + if (data[IFLA_GRE_ERSPAN_VER] && + nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0) + return 0; + + /* ERSPAN type II/III should only have GRE sequence and key flag */ if (data[IFLA_GRE_OFLAGS]) flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (data[IFLA_GRE_IFLAGS]) @@ -1174,7 +1199,7 @@ static int erspan_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); - if (t->erspan_ver != 1 && t->erspan_ver != 2) + if (t->erspan_ver > 2) return -EINVAL; } @@ -1259,7 +1284,11 @@ static int erspan_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - tunnel->tun_hlen = 8; + if (tunnel->erspan_ver == 0) + tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */ + else + tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */ + tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + erspan_hdr_len(tunnel->erspan_ver); @@ -1456,8 +1485,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if (t->erspan_ver == 1 || t->erspan_ver == 2) { - if (!t->collect_md) + if (t->erspan_ver <= 2) { + if (t->erspan_ver != 0 && !t->collect_md) o_flags |= TUNNEL_KEY; if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) @@ -1466,7 +1495,7 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) if (t->erspan_ver == 1) { if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) goto nla_put_failure; - } else { + } else if (t->erspan_ver == 2) { if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index aa3fd61818c4..8206047d70b6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1492,7 +1492,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control = (__force void *) optval; + msg.msg_control_is_user = true; + msg.msg_control_user = optval; msg.msg_controllen = len; msg.msg_flags = flags; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d68128a672ab..7529c2816f2f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2183,8 +2183,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) } /* Detect loss in event "A" above by marking head of queue up as lost. - * For non-SACK(Reno) senders, the first "packets" number of segments - * are considered lost. For RFC3517 SACK, a segment is considered lost if it + * For RFC3517 SACK, a segment is considered lost if it * has at least tp->reordering SACKed seqments above it; "packets" refers to * the maximum SACKed segments to pass before reaching this limit. */ @@ -2192,10 +2191,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt, oldcnt, lost; - unsigned int mss; + int cnt; /* Use SACK to deduce losses of new sequences sent during recovery */ - const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; + const u32 loss_high = tp->snd_nxt; WARN_ON(packets > tp->packets_out); skb = tp->lost_skb_hint; @@ -2218,26 +2216,11 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) break; - oldcnt = cnt; - if (tcp_is_reno(tp) || - (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) cnt += tcp_skb_pcount(skb); - if (cnt > packets) { - if (tcp_is_sack(tp) || - (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || - (oldcnt >= packets)) - break; - - mss = tcp_skb_mss(skb); - /* If needed, chop off the prefix to mark as lost. */ - lost = (packets - oldcnt) * mss; - if (lost < skb->len && - tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, - lost, mss, GFP_ATOMIC) < 0) - break; - cnt = packets; - } + if (cnt > packets) + break; tcp_skb_mark_lost(tp, skb); @@ -2849,8 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, if (tcp_try_undo_partial(sk, prior_snd_una)) return; /* Partial ACK arrived. Force fast retransmit. */ - do_lost = tcp_is_reno(tp) || - tcp_force_fast_retransmit(sk); + do_lost = tcp_force_fast_retransmit(sk); } if (tcp_try_undo_dsack(sk)) { tcp_try_keep_open(sk); @@ -3014,7 +2996,7 @@ void tcp_rearm_rto(struct sock *sk) rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, - TCP_RTO_MAX, tcp_rtx_queue_head(sk)); + TCP_RTO_MAX); } } @@ -3291,7 +3273,7 @@ static void tcp_ack_probe(struct sock *sk) unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - when, TCP_RTO_MAX, NULL); + when, TCP_RTO_MAX); } } @@ -3926,10 +3908,6 @@ void tcp_parse_options(const struct net *net, */ break; #endif - case TCPOPT_MPTCP: - mptcp_parse_option(skb, ptr, opsize, opt_rx); - break; - case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, @@ -6019,9 +5997,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); - if (sk_is_mptcp(sk)) - mptcp_rcv_synsent(sk); - /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c414aeb1efa9..a50e1990a845 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2593,8 +2593,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); - tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, - TCP_RTO_MAX, NULL); + tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); return true; } @@ -3113,6 +3112,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb, *rtx_head, *hole = NULL; struct tcp_sock *tp = tcp_sk(sk); + bool rearm_timer = false; u32 max_segs; int mib_idx; @@ -3135,7 +3135,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) - return; + break; sacked = TCP_SKB_CB(skb)->sacked; /* In case tcp_shift_skb_data() have aggregated large skbs, * we need to make sure not sending too bigs TSO packets @@ -3160,10 +3160,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) continue; if (tcp_small_queue_check(sk, skb, 1)) - return; + break; if (tcp_retransmit_skb(sk, skb, segs)) - return; + break; NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); @@ -3172,11 +3172,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == rtx_head && icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) - tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX, - skb); + rearm_timer = true; + } + if (rearm_timer) + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); } /* We allow to exceed memory limits for FIN packets to expedite @@ -3907,7 +3909,7 @@ void tcp_send_probe0(struct sock *sk) */ timeout = TCP_RESOURCE_PROBE_INTERVAL; } - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 26e666fe9a0e..fd885f06c4ed 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -135,8 +135,7 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void ipv6_regen_rndid(struct inet6_dev *idev); -static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +static void ipv6_gen_rnd_iid(struct in6_addr *addr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(const struct inet6_dev *idev); @@ -432,8 +431,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else - ipv6_regen_rndid(ndev); + } ndev->token = in6addr_any; @@ -1306,29 +1304,21 @@ out: in6_ifa_put(ifp); } -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, - struct inet6_ifaddr *ift, - bool block) +static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) { struct inet6_dev *idev = ifp->idev; - struct in6_addr addr, *tmpaddr; unsigned long tmp_tstamp, age; unsigned long regen_advance; - struct ifa6_config cfg; - int ret = 0; unsigned long now = jiffies; - long max_desync_factor; s32 cnf_temp_preferred_lft; + struct inet6_ifaddr *ift; + struct ifa6_config cfg; + long max_desync_factor; + struct in6_addr addr; + int ret = 0; write_lock_bh(&idev->lock); - if (ift) { - spin_lock_bh(&ift->lock); - memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); - spin_unlock_bh(&ift->lock); - tmpaddr = &addr; - } else { - tmpaddr = NULL; - } + retry: in6_dev_hold(idev); if (idev->cnf.use_tempaddr <= 0) { @@ -1351,8 +1341,8 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - ipv6_try_regen_rndid(idev, tmpaddr); - memcpy(&addr.s6_addr[8], idev->rndid, 8); + ipv6_gen_rnd_iid(&addr); + age = (now - ifp->tstamp) / HZ; regen_advance = idev->cnf.regen_max_retry * @@ -1417,7 +1407,6 @@ retry: in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); - tmpaddr = &addr; write_lock_bh(&idev->lock); goto retry; } @@ -2032,7 +2021,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (ifpub) { in6_ifa_hold(ifpub); spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp, true); + ipv6_create_tempaddr(ifpub, true); in6_ifa_put(ifpub); } else { spin_unlock_bh(&ifp->lock); @@ -2329,40 +2318,38 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) return err; } -/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void ipv6_regen_rndid(struct inet6_dev *idev) +/* Generation of a randomized Interface Identifier + * draft-ietf-6man-rfc4941bis, Section 3.3.1 + */ + +static void ipv6_gen_rnd_iid(struct in6_addr *addr) { regen: - get_random_bytes(idev->rndid, sizeof(idev->rndid)); - idev->rndid[0] &= ~0x02; + get_random_bytes(&addr->s6_addr[8], 8); - /* - * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>: - * check if generated address is not inappropriate + /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1: + * check if generated address is not inappropriate: * - * - Reserved subnet anycast (RFC 2526) - * 11111101 11....11 1xxxxxxx - * - ISATAP (RFC4214) 6.1 - * 00-00-5E-FE-xx-xx-xx-xx - * - value 0 - * - XXX: already assigned to an address on the device + * - Reserved IPv6 Interface Identifers + * - XXX: already assigned to an address on the device */ - if (idev->rndid[0] == 0xfd && - (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && - (idev->rndid[7]&0x80)) + + /* Subnet-router anycast: 0000:0000:0000:0000 */ + if (!(addr->s6_addr32[2] | addr->s6_addr32[3])) goto regen; - if ((idev->rndid[0]|idev->rndid[1]) == 0) { - if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) - goto regen; - if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) - goto regen; - } -} -static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) -{ - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ipv6_regen_rndid(idev); + /* IANA Ethernet block: 0200:5EFF:FE00:0000-0200:5EFF:FE00:5212 + * Proxy Mobile IPv6: 0200:5EFF:FE00:5213 + * IANA Ethernet block: 0200:5EFF:FE00:5214-0200:5EFF:FEFF:FFFF + */ + if (ntohl(addr->s6_addr32[2]) == 0x02005eff && + (ntohl(addr->s6_addr32[3]) & 0Xff000000) == 0xfe000000) + goto regen; + + /* Reserved subnet anycast addresses */ + if (ntohl(addr->s6_addr32[2]) == 0xfdffffff && + ntohl(addr->s6_addr32[3]) >= 0Xffffff80) + goto regen; } /* @@ -2544,7 +2531,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, * no temporary address currently exists. */ read_unlock_bh(&idev->lock); - ipv6_create_tempaddr(ifp, NULL, false); + ipv6_create_tempaddr(ifp, false); } else { read_unlock_bh(&idev->lock); } @@ -4531,7 +4518,7 @@ restart: ifpub->regen_count = 0; spin_unlock(&ifpub->lock); rcu_read_unlock_bh(); - ipv6_create_tempaddr(ifpub, ifp, true); + ipv6_create_tempaddr(ifpub, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); rcu_read_lock_bh(); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 18d05403d3b5..a0e50cc57e54 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1075,6 +1075,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; msg.msg_flags = flags; + msg.msg_control_is_user = true; lock_sock(sk); skb = np->pktoptions; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 22e56465f14d..fcf0d5c87d09 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1377,7 +1377,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) rcu_read_lock(); dev = ip6_rt_get_dev_rcu(res); - pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT); rcu_read_unlock(); if (!pcpu_rt) { fib6_info_release(f6i); @@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) } ip6_rt_copy_init(pcpu_rt, res); pcpu_rt->rt6i_flags |= RTF_PCPU; + + if (f6i->nh) + pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev)); + return pcpu_rt; } +static bool rt6_is_valid(const struct rt6_info *rt6) +{ + return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev)); +} + /* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { @@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); + if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) { + struct rt6_info *prev, **p; + + p = this_cpu_ptr(res->nh->rt6i_pcpu); + prev = xchg(p, NULL); + if (prev) { + dst_dev_put(&prev->dst); + dst_release(&prev->dst); + } + + pcpu_rt = NULL; + } + return pcpu_rt; } @@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = container_of(dst, struct rt6_info, dst); + if (rt->sernum) + return rt6_is_valid(rt) ? dst : NULL; + rcu_read_lock(); /* All IPV6 dsts are created with ->obsolete set to the value @@ -3170,6 +3195,9 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); + if (entries > rt_max_size) + entries = dst_entries_get_slow(ops); + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 4c7e0a27fa9c..37b434293bda 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -27,8 +27,9 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) { - int trailing; unsigned int tlv_offset; + int max_last_entry; + int trailing; if (srh->type != IPV6_SRCRT_TYPE_4) return false; @@ -36,7 +37,12 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) if (((srh->hdrlen + 1) << 3) != len) return false; - if (srh->segments_left > srh->first_segment) + max_last_entry = (srh->hdrlen / 2) - 1; + + if (srh->first_segment > max_last_entry) + return false; + + if (srh->segments_left > srh->first_segment + 1) return false; tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d3b520b9b2c9..fd5ac2788e45 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -56,6 +56,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) { eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4a7c467b99db..45497af23906 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags) return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } -void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, - int opsize, struct tcp_options_received *opt_rx) +static void mptcp_parse_option(const struct sk_buff *skb, + const unsigned char *ptr, int opsize, + struct mptcp_options_received *mp_opt) { - struct mptcp_options_received *mp_opt = &opt_rx->mptcp; u8 subtype = *ptr >> 4; int expected_opsize; u8 version; @@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, } void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx) + struct mptcp_options_received *mp_opt) { - const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff * 4) - sizeof(struct tcphdr); + const unsigned char *ptr; + int length; + + /* initialize option status */ + mp_opt->mp_capable = 0; + mp_opt->mp_join = 0; + mp_opt->add_addr = 0; + mp_opt->rm_addr = 0; + mp_opt->dss = 0; + length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { @@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb, if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) - mptcp_parse_option(skb, ptr, opsize, opt_rx); + mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } @@ -344,28 +352,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, return false; } -void mptcp_rcv_synsent(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct tcp_sock *tp = tcp_sk(sk); - - if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { - subflow->mp_capable = 1; - subflow->can_ack = 1; - subflow->remote_key = tp->rx_opt.mptcp.sndr_key; - pr_debug("subflow=%p, remote_key=%llu", subflow, - subflow->remote_key); - } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { - subflow->mp_join = 1; - subflow->thmac = tp->rx_opt.mptcp.thmac; - subflow->remote_nonce = tp->rx_opt.mptcp.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); - } else if (subflow->request_mptcp) { - tcp_sk(sk)->is_mptcp = 0; - } -} - /* MP_JOIN client subflow must wait for 4th ack before sending any data: * TCP can't schedule delack timer before the subflow is fully established. * MPTCP uses the delack timer to do 3rd ack retransmissions @@ -709,7 +695,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) return subflow->mp_capable; - if (mp_opt->use_ack) { + if (mp_opt->dss && mp_opt->use_ack) { /* subflows are fully established as soon as we get any * additional ack. */ @@ -717,8 +703,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, goto fully_established; } - WARN_ON_ONCE(subflow->can_ack); - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP */ @@ -728,6 +712,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, return false; } + if (unlikely(!READ_ONCE(msk->pm.server_side))) + pr_warn_once("bogus mpc option on established client sk"); subflow->fully_established = 1; subflow->remote_key = mp_opt->sndr_key; subflow->can_ack = 1; @@ -819,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct mptcp_options_received *mp_opt; + struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; - mp_opt = &opt_rx->mptcp; - if (!check_fully_established(msk, sk, subflow, skb, mp_opt)) + mptcp_get_options(skb, &mp_opt); + if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; - if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) { + if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { struct mptcp_addr_info addr; - addr.port = htons(mp_opt->port); - addr.id = mp_opt->addr_id; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { + addr.port = htons(mp_opt.port); + addr.id = mp_opt.addr_id; + if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { addr.family = AF_INET; - addr.addr = mp_opt->addr; + addr.addr = mp_opt.addr; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) { + else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { addr.family = AF_INET6; - addr.addr6 = mp_opt->addr6; + addr.addr6 = mp_opt.addr6; } #endif - if (!mp_opt->echo) + if (!mp_opt.echo) mptcp_pm_add_addr_received(msk, &addr); - mp_opt->add_addr = 0; + mp_opt.add_addr = 0; } - if (!mp_opt->dss) + if (!mp_opt.dss) return; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ - if (mp_opt->use_ack) - update_una(msk, mp_opt); + if (mp_opt.use_ack) + update_una(msk, &mp_opt); mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) @@ -861,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, memset(mpext, 0, sizeof(*mpext)); - if (mp_opt->use_map) { - if (mp_opt->mpc_map) { + if (mp_opt.use_map) { + if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ @@ -872,13 +858,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->subflow_seq = 1; mpext->dsn64 = 1; mpext->mpc_map = 1; + mpext->data_fin = 0; } else { - mpext->data_seq = mp_opt->data_seq; - mpext->subflow_seq = mp_opt->subflow_seq; - mpext->dsn64 = mp_opt->dsn64; - mpext->data_fin = mp_opt->data_fin; + mpext->data_seq = mp_opt.data_seq; + mpext->subflow_seq = mp_opt.subflow_seq; + mpext->dsn64 = mp_opt.dsn64; + mpext->data_fin = mp_opt.data_fin; } - mpext->data_len = mp_opt->data_len; + mpext->data_len = mp_opt.data_len; mpext->use_map = 1; } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b22a63ba2348..e1f23016ed3f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1316,11 +1316,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - lock_sock(sk); - __mptcp_clear_xmit(sk); - release_sock(sk); - mptcp_cancel_work(sk); - return tcp_disconnect(sk, flags); + /* Should never be called. + * inet_stream_connect() calls ->disconnect, but that + * refers to the subflow socket, not the mptcp one. + */ + WARN_ON_ONCE(1); + return 0; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1333,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -1372,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (opt_rx->mptcp.mp_capable) { + if (mp_opt->mp_capable) { msk->can_ack = true; - msk->remote_key = opt_rx->mptcp.sndr_key; + msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a2b3048037d0..e4ca6320ce76 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -91,6 +91,45 @@ #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +struct mptcp_options_received { + u64 sndr_key; + u64 rcvr_key; + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u16 mp_capable : 1, + mp_join : 1, + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; +}; + static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | @@ -331,10 +370,10 @@ int mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx); + struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 87c094702d63..009d5c478062 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -124,12 +124,11 @@ static void subflow_init_req(struct request_sock *req, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct tcp_options_received rx_opt; + struct mptcp_options_received mp_opt; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); - mptcp_get_options(skb, &rx_opt); + mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; @@ -142,16 +141,16 @@ static void subflow_init_req(struct request_sock *req, return; #endif - if (rx_opt.mptcp.mp_capable) { + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (rx_opt.mptcp.mp_join) + if (mp_opt.mp_join) return; - } else if (rx_opt.mptcp.mp_join) { + } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { + if (mp_opt.mp_capable && listener->request_mptcp) { int err; err = mptcp_token_new_request(req); @@ -159,13 +158,13 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; - } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) { + } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; - subflow_req->backup = rx_opt.mptcp.backup; - subflow_req->remote_id = rx_opt.mptcp.join_id; - subflow_req->token = rx_opt.mptcp.token; - subflow_req->remote_nonce = rx_opt.mptcp.nonce; + subflow_req->backup = mp_opt.backup; + subflow_req->remote_id = mp_opt.join_id; + subflow_req->token = mp_opt.token; + subflow_req->remote_nonce = mp_opt.nonce; pr_debug("token=%u, remote_nonce=%u", subflow_req->token, subflow_req->remote_nonce); if (!subflow_token_join_request(req, skb)) { @@ -221,23 +220,47 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; + struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) != TCP_ESTABLISHED) { + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { inet_sk_state_store(parent, TCP_ESTABLISHED); parent->sk_state_change(parent); } - if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp) + /* be sure no special action on any packet other than syn-ack */ + if (subflow->conn_finished) + return; + + subflow->conn_finished = 1; + + mptcp_get_options(skb, &mp_opt); + if (subflow->request_mptcp && mp_opt.mp_capable) { + subflow->mp_capable = 1; + subflow->can_ack = 1; + subflow->remote_key = mp_opt.sndr_key; + pr_debug("subflow=%p, remote_key=%llu", subflow, + subflow->remote_key); + } else if (subflow->request_join && mp_opt.mp_join) { + subflow->mp_join = 1; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, + subflow->thmac, subflow->remote_nonce); + } else if (subflow->request_mptcp) { + tp->is_mptcp = 0; + } + + if (!tp->is_mptcp) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - subflow->conn_finished = 1; if (skb) { pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); @@ -264,7 +287,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!mptcp_finish_join(sk)) goto do_reset; - subflow->conn_finished = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); } else { do_reset: @@ -322,7 +344,7 @@ drop: /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, - const struct tcp_options_received *rx_opt) + const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; u8 hmac[MPTCPOPT_HMAC_LEN]; @@ -339,7 +361,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac))) ret = false; sock_put((struct sock *)msk); @@ -395,7 +417,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; - struct tcp_options_received opt_rx; + struct mptcp_options_received mp_opt; bool fallback_is_fatal = false; struct sock *new_msk = NULL; bool fallback = false; @@ -403,7 +425,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - opt_rx.mptcp.mp_capable = 0; + /* we need later a valid 'mp_capable' value even when options are not + * parsed + */ + mp_opt.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -418,22 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_capable) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_capable) { fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req); + new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; - opt_rx.mptcp.mp_join = 0; - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_join || - !subflow_hmac_valid(req, &opt_rx)) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_join || + !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); return NULL; } @@ -473,9 +497,9 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - ctx->remote_key = opt_rx.mptcp.sndr_key; - ctx->fully_established = opt_rx.mptcp.mp_capable; - ctx->can_ack = opt_rx.mptcp.mp_capable; + ctx->remote_key = mp_opt.sndr_key; + ctx->fully_established = mp_opt.mp_capable; + ctx->can_ack = mp_opt.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -499,7 +523,7 @@ out: /* check for expected invariant - should never trigger, just help * catching eariler subtle bugs */ - WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp && + WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && (!mptcp_subflow_ctx(child) || !mptcp_subflow_ctx(child)->conn)); return child; diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 3d816a1e5442..59151dc07fdc 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff *skb, enum nf_nat_manip_type maniptype) { struct udphdr *hdr; - bool do_csum; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct udphdr *)(skb->data + hdroff); - do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check); - __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum); return true; } diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 9f5dea0064ea..916a3c7f9eaf 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struct sk_buff *skb, static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, const struct sk_buff *skb, const struct iphdr *ip, - unsigned char *opts) + unsigned char *opts, + struct tcphdr *_tcph) { const struct tcphdr *tcp; - struct tcphdr _tcph; - tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph); if (!tcp) return NULL; @@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, int fmatch = FMATCH_WRONG; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; + struct tcphdr _tcph; memset(&ctx, 0, sizeof(ctx)); - tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; @@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *skb, const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; + struct tcphdr _tcph; memset(&ctx, 0, sizeof(ctx)); - tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7b1a74f74aad..eccc7d366e17 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -64,6 +64,26 @@ static DEFINE_SPINLOCK(nr_list_lock); static const struct proto_ops nr_proto_ops; /* + * NETROM network devices are virtual network devices encapsulating NETROM + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key nr_netdev_xmit_lock_key; + +static void nr_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); +} + +static void nr_set_lockdep_key(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); +} + +/* * Socket removal during an interrupt is now safe. */ static void nr_remove_socket(struct sock *sk) @@ -1394,6 +1414,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } + nr_set_lockdep_key(dev); dev_nr[i] = dev; } diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index 63f89cc6e82c..f362ca316015 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -4,7 +4,6 @@ config QRTR tristate "Qualcomm IPC Router support" - depends on ARCH_QCOM || COMPILE_TEST ---help--- Say Y if you intend to use Qualcomm IPC router protocol. The protocol is used to communicate with services provided by other @@ -29,4 +28,11 @@ config QRTR_TUN implement endpoints of QRTR, for purpose of tunneling data to other hosts or testing purposes. +config QRTR_MHI + tristate "MHI IPC Router channels" + depends on MHI_BUS + help + Say Y here to support MHI based ipcrouter channels. MHI is the + transport used for communicating to external modems. + endif # QRTR diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile index 32d4e923925d..1b1411d158a7 100644 --- a/net/qrtr/Makefile +++ b/net/qrtr/Makefile @@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o qrtr-smd-y := smd.o obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o qrtr-tun-y := tun.o +obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o +qrtr-mhi-y := mhi.o diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c new file mode 100644 index 000000000000..ff0c41467fc1 --- /dev/null +++ b/net/qrtr/mhi.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/mhi.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> + +#include "qrtr.h" + +struct qrtr_mhi_dev { + struct qrtr_endpoint ep; + struct mhi_device *mhi_dev; + struct device *dev; +}; + +/* From MHI to QRTR */ +static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev); + int rc; + + if (!qdev || mhi_res->transaction_status) + return; + + rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr, + mhi_res->bytes_xferd); + if (rc == -EINVAL) + dev_err(qdev->dev, "invalid ipcrouter packet\n"); +} + +/* From QRTR to MHI */ +static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct sk_buff *skb = mhi_res->buf_addr; + + if (skb->sk) + sock_put(skb->sk); + consume_skb(skb); +} + +/* Send data over MHI */ +static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) +{ + struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); + int rc; + + rc = skb_linearize(skb); + if (rc) + goto free_skb; + + rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len, + MHI_EOT); + if (rc) + goto free_skb; + + if (skb->sk) + sock_hold(skb->sk); + + return rc; + +free_skb: + kfree_skb(skb); + + return rc; +} + +static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, + const struct mhi_device_id *id) +{ + struct qrtr_mhi_dev *qdev; + int rc; + + qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL); + if (!qdev) + return -ENOMEM; + + qdev->mhi_dev = mhi_dev; + qdev->dev = &mhi_dev->dev; + qdev->ep.xmit = qcom_mhi_qrtr_send; + + dev_set_drvdata(&mhi_dev->dev, qdev); + rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); + if (rc) + return rc; + + dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); + + return 0; +} + +static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev) +{ + struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev); + + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); +} + +static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = { + { .chan = "IPCR" }, + {} +}; +MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table); + +static struct mhi_driver qcom_mhi_qrtr_driver = { + .probe = qcom_mhi_qrtr_probe, + .remove = qcom_mhi_qrtr_remove, + .dl_xfer_cb = qcom_mhi_qrtr_dl_callback, + .ul_xfer_cb = qcom_mhi_qrtr_ul_callback, + .id_table = qcom_mhi_qrtr_id_table, + .driver = { + .name = "qcom_mhi_qrtr", + }, +}; + +module_mhi_driver(qcom_mhi_qrtr_driver); + +MODULE_AUTHOR("Chris Lew <clew@codeaurora.org>"); +MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>"); +MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver"); +MODULE_LICENSE("GPL v2"); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1e8eeb044b07..e7a872207b46 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -65,6 +65,26 @@ static const struct proto_ops rose_proto_ops; ax25_address rose_callsign; /* + * ROSE network devices are virtual network devices encapsulating ROSE + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key rose_netdev_xmit_lock_key; + +static void rose_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); +} + +static void rose_set_lockdep_key(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); +} + +/* * Convert a ROSE address into text. */ char *rose2asc(char *buf, const rose_address *addr) @@ -1511,6 +1531,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } + rose_set_lockdep_key(dev); dev_rose[i] = dev; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7e85c91d0752..299b963c796e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2074,6 +2074,7 @@ replay: err = PTR_ERR(block); goto errout; } + block->classid = parent; chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { @@ -2616,12 +2617,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; parent = tcm->tcm_parent; - if (!parent) { + if (!parent) q = dev->qdisc; - parent = q->handle; - } else { + else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); - } if (!q) goto out; cops = q->ops->cl_ops; @@ -2637,6 +2636,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) block = cops->tcf_block(q, cl, NULL); if (!block) goto out; + parent = block->classid; if (tcf_block_shared(block)) q = NULL; } @@ -3548,6 +3548,16 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry, return 0; } +static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts) { @@ -3571,7 +3581,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; @@ -3639,7 +3649,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mangle.mask = tcf_pedit_mask(act, k); entry->mangle.val = tcf_pedit_val(act, k); entry->mangle.offset = tcf_pedit_offset(act, k); - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); entry = &flow_action->entries[++j]; } } else if (is_tcf_csum(act)) { diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index a36974e9c601..bd618b00d319 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -131,7 +131,6 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx, } struct choke_skb_cb { - u16 classid; u8 keys_valid; struct flow_keys_digest keys; }; @@ -142,11 +141,6 @@ static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; } -static inline void choke_set_classid(struct sk_buff *skb, u16 classid) -{ - choke_skb_cb(skb)->classid = classid; -} - /* * Compare flow of two packets * Returns true only if source and destination address and port match. @@ -323,7 +317,8 @@ static void choke_reset(struct Qdisc *sch) sch->q.qlen = 0; sch->qstats.backlog = 0; - memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + if (q->tab) + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 4f0104243cc2..8f06a808c59a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -100,6 +100,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + u64 ktime_cache; /* copy of last ktime_get_ns() */ unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ @@ -109,12 +110,13 @@ struct fq_sched_data { u32 flow_plimit; /* max packets per flow */ unsigned long flow_max_rate; /* optional max rate per flow */ u64 ce_threshold; + u64 horizon; /* horizon in ns */ u32 orphan_mask; /* mask for orphaned skb */ u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; - + u8 horizon_drop; u32 flows; u32 inactive_flows; u32 throttled_flows; @@ -123,6 +125,8 @@ struct fq_sched_data { u64 stat_internal_packets; u64 stat_throttled; u64 stat_ce_mark; + u64 stat_horizon_drops; + u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; @@ -402,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) struct rb_node **p, *parent; struct sk_buff *head, *aux; - fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns(); - head = flow->head; if (!head || fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { @@ -431,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) rb_insert_color(&skb->rbnode, &flow->t_root); } +static bool fq_packet_beyond_horizon(const struct sk_buff *skb, + const struct fq_sched_data *q) +{ + return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); +} + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -440,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); + if (!skb->tstamp) { + fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); + } else { + /* Check if packet timestamp is too far in the future. + * Try first if our cached value, to avoid ktime_get_ns() + * cost in most cases. + */ + if (fq_packet_beyond_horizon(skb, q)) { + /* Refresh our cache and check another time */ + q->ktime_cache = ktime_get_ns(); + if (fq_packet_beyond_horizon(skb, q)) { + if (q->horizon_drop) { + q->stat_horizon_drops++; + return qdisc_drop(skb, sch, to_free); + } + q->stat_horizon_caps++; + skb->tstamp = q->ktime_cache + q->horizon; + } + } + fq_skb_cb(skb)->time_to_send = skb->tstamp; + } + f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; @@ -512,7 +542,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) goto out; } - now = ktime_get_ns(); + q->ktime_cache = now = ktime_get_ns(); fq_check_throttled(q, now); begin: head = &q->new_flows; @@ -765,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, + [TCA_FQ_HORIZON] = { .type = NLA_U32 }, + [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt, @@ -854,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_TIMER_SLACK]) q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); + if (tb[TCA_FQ_HORIZON]) + q->horizon = (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_HORIZON]); + + if (tb[TCA_FQ_HORIZON_DROP]) + q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]); + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); sch_tree_lock(sch); @@ -907,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ + q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */ + q->horizon_drop = 1; /* by default, drop packets beyond horizon */ + /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; @@ -924,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); u64 ce_threshold = q->ce_threshold; + u64 horizon = q->horizon; struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -933,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ do_div(ce_threshold, NSEC_PER_USEC); + do_div(horizon, NSEC_PER_USEC); if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || @@ -948,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || - nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack)) + nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) || + nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || + nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -979,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.unthrottle_latency_ns = min_t(unsigned long, q->unthrottle_latency_ns, ~0U); st.ce_mark = q->stat_ce_mark; + st.horizon_drops = q->stat_horizon_drops; + st.horizon_caps = q->stat_horizon_caps; sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 968519ff36e9..436160be9c18 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) - q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); if (tb[TCA_FQ_CODEL_MEMORY_LIMIT]) q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2efd5b61acef..ebc55d884247 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -794,6 +794,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { }; EXPORT_SYMBOL(pfifo_fast_ops); +static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, struct netlink_ext_ack *extack) @@ -846,9 +849,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, } spin_lock_init(&sch->busylock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); sch->ops = ops; sch->flags = ops->static_flags; @@ -859,12 +870,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, dev_hold(dev); refcount_set(&sch->refcnt, 1); - if (sch != &noop_qdisc) { - lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->running, &dev->qdisc_running_key); - } - return sch; errout1: kfree(p); @@ -1037,10 +1042,9 @@ static void attach_one_default_qdisc(struct net_device *dev, ops = &pfifo_fast_ops; qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); - if (!qdisc) { - netdev_info(dev, "activation failed\n"); + if (!qdisc) return; - } + if (!netif_is_multiqueue(dev)) qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; dev_queue->qdisc_sleeping = qdisc; @@ -1065,6 +1069,18 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } + + /* Detect default qdisc setup/init failed and fallback to "noqueue" */ + if (dev->qdisc == &noop_qdisc) { + netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", + default_qdisc_ops->id, noqueue_qdisc_ops.id); + dev->priv_flags |= IFF_NO_QUEUE; + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + dev->qdisc = txq->qdisc_sleeping; + qdisc_refcount_inc(dev->qdisc); + dev->priv_flags ^= IFF_NO_QUEUE; + } + #ifdef CONFIG_NET_SCHED if (dev->qdisc != &noop_qdisc) qdisc_hash_add(dev->qdisc, false); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c787d4d46017..5a6def5e4e6d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; + + /* slot->allot is a short, make sure quantum is not too big. */ + if (ctl->quantum) { + unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); + + if (scaled <= 0 || scaled > SHRT_MAX) + return -EINVAL; + } + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog)) return -EINVAL; diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 0fb10abf7579..7a5e4c454715 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, { struct tc_skbprio_qopt *ctl = nla_data(opt); + if (opt->nla_len != nla_attr_size(sizeof(*ctl))) + return -EINVAL; + sch->limit = ctl->limit; return 0; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c67272007f41..903321543838 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -378,8 +378,6 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) struct smc_llc_qentry *qentry; int rc; - link->lgr->type = SMC_LGR_SINGLE; - /* receive CONFIRM LINK request from server over RoCE fabric */ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, SMC_LLC_CONFIRM_LINK); @@ -390,6 +388,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } + smc_llc_save_peer_uid(qentry); rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); if (rc) @@ -413,6 +412,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) return SMC_CLC_DECL_TIMEOUT_CL; smc_llc_link_active(link); + smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); /* optional 2nd link, receive ADD LINK request from server */ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, @@ -1036,8 +1036,6 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) struct smc_llc_qentry *qentry; int rc; - link->lgr->type = SMC_LGR_SINGLE; - if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; @@ -1056,6 +1054,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } + smc_llc_save_peer_uid(qentry); rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); if (rc) @@ -1065,6 +1064,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smc->conn.rmb_desc->is_conf_rkey = true; smc_llc_link_active(link); + smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); /* initial contact - try to establish second link */ smc_llc_srv_add_link(link); diff --git a/net/smc/smc.h b/net/smc/smc.h index 1a084afa7372..6f1c42da7a4c 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -143,6 +143,9 @@ struct smc_connection { * .prod cf. TCP snd_nxt * .cons cf. TCP sends ack */ + union smc_host_cursor local_tx_ctrl_fin; + /* prod crsr - confirmed by peer + */ union smc_host_cursor tx_curs_prep; /* tx - prepared data * snd_max..wmem_alloc */ @@ -154,6 +157,7 @@ struct smc_connection { */ atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ + u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ @@ -184,12 +188,14 @@ struct smc_connection { spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ + struct work_struct abort_work; /* abort the connection */ struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ u8 rx_off; /* receive offset: * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 out_of_sync : 1; /* out of sync with peer */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index f64589d823aa..b2b85e1be72c 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -47,17 +47,20 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn); + smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor, + conn); + conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_link *link, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { - struct smc_link *link = conn->lnk; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, @@ -104,22 +107,64 @@ int smc_cdc_msg_send(struct smc_connection *conn, if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } else { + conn->tx_cdc_seq--; + conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; } return rc; } +/* send a validation msg indicating the move of a conn to an other QP link */ +int smcr_cdc_msg_send_validation(struct smc_connection *conn) +{ + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + struct smc_link *link = conn->lnk; + struct smc_cdc_tx_pend *pend; + struct smc_wr_buf *wr_buf; + struct smc_cdc_msg *peer; + int rc; + + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); + if (rc) + return rc; + + peer = (struct smc_cdc_msg *)wr_buf; + peer->common.type = local->common.type; + peer->len = local->len; + peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */ + peer->token = htonl(local->token); + peer->prod_flags.failover_validation = 1; + + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + return rc; +} + static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) { struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; + struct smc_link *link; + bool again = false; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); +again: + link = conn->lnk; + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); if (rc) return rc; spin_lock_bh(&conn->send_lock); + if (link != conn->lnk) { + /* link of connection changed, try again one time*/ + spin_unlock_bh(&conn->send_lock); + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); + if (again) + return -ENOLINK; + again = true; + goto again; + } rc = smc_cdc_msg_send(conn, wr_buf, pend); spin_unlock_bh(&conn->send_lock); return rc; @@ -237,6 +282,28 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, sk_send_sigurg(&smc->sk); } +static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc, + struct smc_link *link) +{ + struct smc_connection *conn = &smc->conn; + u16 recv_seq = ntohs(cdc->seqno); + s16 diff; + + /* check that seqnum was seen before */ + diff = conn->local_rx_ctrl.seqno - recv_seq; + if (diff < 0) { /* diff larger than 0x7fff */ + /* drop connection */ + conn->out_of_sync = 1; /* prevent any further receives */ + spin_lock_bh(&conn->send_lock); + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + conn->lnk = link; + spin_unlock_bh(&conn->send_lock); + sock_hold(&smc->sk); /* sock_put in abort_work */ + if (!schedule_work(&conn->abort_work)) + sock_put(&smc->sk); + } +} + static void smc_cdc_msg_recv_action(struct smc_sock *smc, struct smc_cdc_msg *cdc) { @@ -367,16 +434,19 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) read_lock_bh(&lgr->conns_lock); conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); read_unlock_bh(&lgr->conns_lock); - if (!conn) + if (!conn || conn->out_of_sync) return; smc = container_of(conn, struct smc_sock, conn); - if (!cdc->prod_flags.failover_validation) { - if (smc_cdc_before(ntohs(cdc->seqno), - conn->local_rx_ctrl.seqno)) - /* received seqno is old */ - return; + if (cdc->prod_flags.failover_validation) { + smc_cdc_msg_validate(smc, cdc, link); + return; } + if (smc_cdc_before(ntohs(cdc->seqno), + conn->local_rx_ctrl.seqno)) + /* received seqno is old */ + return; + smc_cdc_msg_recv(smc, cdc); } diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 861dc24c588c..2ddcc5fb5ceb 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -97,23 +97,6 @@ static inline void smc_curs_add(int size, union smc_host_cursor *curs, } } -/* SMC cursors are 8 bytes long and require atomic reading and writing */ -static inline u64 smc_curs_read(union smc_host_cursor *curs, - struct smc_connection *conn) -{ -#ifndef KERNEL_HAS_ATOMIC64 - unsigned long flags; - u64 ret; - - spin_lock_irqsave(&conn->acurs_lock, flags); - ret = curs->acurs; - spin_unlock_irqrestore(&conn->acurs_lock, flags); - return ret; -#else - return atomic64_read(&curs->acurs); -#endif -} - /* Copy cursor src into tgt */ static inline void smc_curs_copy(union smc_host_cursor *tgt, union smc_host_cursor *src, @@ -304,6 +287,7 @@ struct smc_cdc_tx_pend { }; int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_link *link, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); @@ -312,6 +296,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); int smcd_cdc_msg_send(struct smc_connection *conn); +int smcr_cdc_msg_send_validation(struct smc_connection *conn); int smc_cdc_init(void) __init; void smcd_cdc_rx_init(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 32a6cadc5c1f..65de700e1f17 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -121,16 +121,60 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn) rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); } +/* assign an SMC-R link to the connection */ +static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) +{ + enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : + SMC_LNK_ACTIVE; + int i, j; + + /* do link balancing */ + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &conn->lgr->lnk[i]; + + if (lnk->state != expected || lnk->link_is_asym) + continue; + if (conn->lgr->role == SMC_CLNT) { + conn->lnk = lnk; /* temporary, SMC server assigns link*/ + break; + } + if (conn->lgr->conns_num % 2) { + for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { + struct smc_link *lnk2; + + lnk2 = &conn->lgr->lnk[j]; + if (lnk2->state == expected && + !lnk2->link_is_asym) { + conn->lnk = lnk2; + break; + } + } + } + if (!conn->lnk) + conn->lnk = lnk; + break; + } + if (!conn->lnk) + return SMC_CLC_DECL_NOACTLINK; + return 0; +} + /* Register connection in link group by assigning an alert token * registered in a search tree. * Requires @conns_lock * Note that '0' is a reserved value and not assigned. */ -static int smc_lgr_register_conn(struct smc_connection *conn) +static int smc_lgr_register_conn(struct smc_connection *conn, bool first) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); static atomic_t nexttoken = ATOMIC_INIT(0); + int rc; + if (!conn->lgr->is_smcd) { + rc = smcr_lgr_conn_assign_link(conn, first); + if (rc) + return rc; + } /* find a new alert_token_local value not yet used by some connection * in this link group */ @@ -141,22 +185,6 @@ static int smc_lgr_register_conn(struct smc_connection *conn) conn->alert_token_local = 0; } smc_lgr_add_alert_token(conn); - - /* assign the new connection to a link */ - if (!conn->lgr->is_smcd) { - struct smc_link *lnk; - int i; - - /* tbd - link balancing */ - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - lnk = &conn->lgr->lnk[i]; - if (lnk->state == SMC_LNK_ACTIVATING || - lnk->state == SMC_LNK_ACTIVE) - conn->lnk = lnk; - } - if (!conn->lnk) - return SMC_CLC_DECL_NOACTLINK; - } conn->lgr->conns_num++; return 0; } @@ -209,6 +237,19 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) smc_lgr_schedule_free_work_fast(lgr); } +static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) +{ + int i; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + + if (smc_link_usable(lnk)) + lnk->state = SMC_LNK_INACTIVE; + } + wake_up_interruptible_all(&lgr->llc_waiter); +} + static void smc_lgr_free(struct smc_link_group *lgr); static void smc_lgr_free_work(struct work_struct *work) @@ -218,7 +259,6 @@ static void smc_lgr_free_work(struct work_struct *work) free_work); spinlock_t *lgr_lock; bool conns; - int i; smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); @@ -238,17 +278,13 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(lgr_lock); cancel_delayed_work(&lgr->free_work); + if (!lgr->is_smcd && !lgr->terminating) + smc_llc_send_link_delete_all(lgr, true, + SMC_LLC_DEL_PROG_INIT_TERM); if (lgr->is_smcd && !lgr->terminating) smc_ism_signal_shutdown(lgr); - if (!lgr->is_smcd) { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - struct smc_link *lnk = &lgr->lnk[i]; - - if (smc_link_usable(lnk)) - lnk->state = SMC_LNK_INACTIVE; - } - wake_up_interruptible_all(&lgr->llc_waiter); - } + if (!lgr->is_smcd) + smcr_lgr_link_deactivate_all(lgr); smc_lgr_free(lgr); } @@ -295,6 +331,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); if (!ini->ib_dev->initialized) { rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); @@ -332,7 +369,7 @@ dealloc_pd: free_link_mem: smc_wr_free_link_mem(lnk); clear_llc_lnk: - smc_llc_link_clear(lnk); + smc_llc_link_clear(lnk, false); out: put_device(&ini->ib_dev->ibdev->dev); memset(lnk, 0, sizeof(struct smc_link)); @@ -432,6 +469,135 @@ out: return rc; } +static int smc_write_space(struct smc_connection *conn) +{ + int buffer_len = conn->peer_rmbe_size; + union smc_host_cursor prod; + union smc_host_cursor cons; + int space; + + smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); + smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); + /* determine rx_buf space */ + space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); + return space; +} + +static int smc_switch_cursor(struct smc_sock *smc) +{ + struct smc_connection *conn = &smc->conn; + union smc_host_cursor cons, fin; + int rc = 0; + int diff; + + smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); + smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); + /* set prod cursor to old state, enforce tx_rdma_writes() */ + smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); + smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); + + if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { + /* cons cursor advanced more than fin, and prod was set + * fin above, so now prod is smaller than cons. Fix that. + */ + diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); + smc_curs_add(conn->sndbuf_desc->len, + &conn->tx_curs_sent, diff); + smc_curs_add(conn->sndbuf_desc->len, + &conn->tx_curs_fin, diff); + + smp_mb__before_atomic(); + atomic_add(diff, &conn->sndbuf_space); + smp_mb__after_atomic(); + + smc_curs_add(conn->peer_rmbe_size, + &conn->local_tx_ctrl.prod, diff); + smc_curs_add(conn->peer_rmbe_size, + &conn->local_tx_ctrl_fin, diff); + } + /* recalculate, value is used by tx_rdma_writes() */ + atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); + + if (smc->sk.sk_state != SMC_INIT && + smc->sk.sk_state != SMC_CLOSED) { + rc = smcr_cdc_msg_send_validation(conn); + if (!rc) { + schedule_delayed_work(&conn->tx_work, 0); + smc->sk.sk_data_ready(&smc->sk); + } + } + return rc; +} + +struct smc_link *smc_switch_conns(struct smc_link_group *lgr, + struct smc_link *from_lnk, bool is_dev_err) +{ + struct smc_link *to_lnk = NULL; + struct smc_connection *conn; + struct smc_sock *smc; + struct rb_node *node; + int i, rc = 0; + + /* link is inactive, wake up tx waiters */ + smc_wr_wakeup_tx_wait(from_lnk); + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_ACTIVE || + i == from_lnk->link_idx) + continue; + if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && + from_lnk->ibport == lgr->lnk[i].ibport) { + continue; + } + to_lnk = &lgr->lnk[i]; + break; + } + if (!to_lnk) { + smc_lgr_terminate_sched(lgr); + return NULL; + } +again: + read_lock_bh(&lgr->conns_lock); + for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { + conn = rb_entry(node, struct smc_connection, alert_node); + if (conn->lnk != from_lnk) + continue; + smc = container_of(conn, struct smc_sock, conn); + /* conn->lnk not yet set in SMC_INIT state */ + if (smc->sk.sk_state == SMC_INIT) + continue; + if (smc->sk.sk_state == SMC_CLOSED || + smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || + smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || + smc->sk.sk_state == SMC_APPFINCLOSEWAIT || + smc->sk.sk_state == SMC_APPCLOSEWAIT1 || + smc->sk.sk_state == SMC_APPCLOSEWAIT2 || + smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || + smc->sk.sk_state == SMC_PEERABORTWAIT || + smc->sk.sk_state == SMC_PROCESSABORT) { + spin_lock_bh(&conn->send_lock); + conn->lnk = to_lnk; + spin_unlock_bh(&conn->send_lock); + continue; + } + sock_hold(&smc->sk); + read_unlock_bh(&lgr->conns_lock); + /* avoid race with smcr_tx_sndbuf_nonempty() */ + spin_lock_bh(&conn->send_lock); + conn->lnk = to_lnk; + rc = smc_switch_cursor(smc); + spin_unlock_bh(&conn->send_lock); + sock_put(&smc->sk); + if (rc) { + smcr_link_down_cond_sched(to_lnk); + return NULL; + } + goto again; + } + read_unlock_bh(&lgr->conns_lock); + return to_lnk; +} + static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, struct smc_link_group *lgr) { @@ -486,6 +652,8 @@ void smc_conn_free(struct smc_connection *conn) tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_tx_dismiss_slots(conn); + if (current_work() != &conn->abort_work) + cancel_work_sync(&conn->abort_work); } if (!list_empty(&lgr->list)) { smc_lgr_unregister_conn(conn); @@ -550,14 +718,14 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } /* must be called under lgr->llc_conf_mutex lock */ -void smcr_link_clear(struct smc_link *lnk) +void smcr_link_clear(struct smc_link *lnk, bool log) { struct smc_ib_device *smcibdev; if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) return; lnk->peer_qpn = 0; - smc_llc_link_clear(lnk); + smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); smc_ib_modify_qp_reset(lnk); @@ -640,6 +808,16 @@ static void smc_lgr_free(struct smc_link_group *lgr) { int i; + if (!lgr->is_smcd) { + mutex_lock(&lgr->llc_conf_mutex); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_UNUSED) + smcr_link_clear(&lgr->lnk[i], false); + } + mutex_unlock(&lgr->llc_conf_mutex); + smc_llc_lgr_clear(lgr); + } + smc_lgr_free_bufs(lgr); if (lgr->is_smcd) { if (!lgr->terminating) { @@ -649,11 +827,6 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_UNUSED) - smcr_link_clear(&lgr->lnk[i]); - } - smc_llc_lgr_clear(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } @@ -708,21 +881,18 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) static void smc_lgr_cleanup(struct smc_link_group *lgr) { - int i; - if (lgr->is_smcd) { smc_ism_signal_shutdown(lgr); smcd_unregister_all_dmbs(lgr); smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); } else { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - struct smc_link *lnk = &lgr->lnk[i]; + u32 rsn = lgr->llc_termination_rsn; - if (smc_link_usable(lnk)) - lnk->state = SMC_LNK_INACTIVE; - } - wake_up_interruptible_all(&lgr->llc_waiter); + if (!rsn) + rsn = SMC_LLC_DEL_PROG_INIT_TERM; + smc_llc_send_link_delete_all(lgr, false, rsn); + smcr_lgr_link_deactivate_all(lgr); } } @@ -738,8 +908,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) if (lgr->terminating) return; /* lgr already terminating */ - if (!soft) - cancel_delayed_work_sync(&lgr->free_work); + /* cancel free_work sync, will terminate when lgr->freeing is set */ + cancel_delayed_work_sync(&lgr->free_work); lgr->terminating = 1; /* kill remaining link group connections */ @@ -759,10 +929,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) } read_unlock_bh(&lgr->conns_lock); smc_lgr_cleanup(lgr); - if (soft) - smc_lgr_schedule_free_work_fast(lgr); - else - smc_lgr_free(lgr); + smc_lgr_free(lgr); } /* unlink link group and schedule termination */ @@ -777,6 +944,7 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) return; /* lgr already terminating */ } list_del_init(&lgr->list); + lgr->freeing = 1; spin_unlock_bh(lgr_lock); schedule_work(&lgr->terminate_work); } @@ -795,6 +963,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) if (peer_gid) /* peer triggered termination */ lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; } } spin_unlock_bh(&dev->lgr_lock); @@ -854,6 +1023,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { list_del_init(&lgr->list); + smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); __smc_lgr_terminate(lgr, false); } @@ -867,6 +1037,61 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) } } +/* set new lgr type and clear all asymmetric link tagging */ +void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) +{ + char *lgr_type = ""; + int i; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + if (smc_link_usable(&lgr->lnk[i])) + lgr->lnk[i].link_is_asym = false; + if (lgr->type == new_type) + return; + lgr->type = new_type; + + switch (lgr->type) { + case SMC_LGR_NONE: + lgr_type = "NONE"; + break; + case SMC_LGR_SINGLE: + lgr_type = "SINGLE"; + break; + case SMC_LGR_SYMMETRIC: + lgr_type = "SYMMETRIC"; + break; + case SMC_LGR_ASYMMETRIC_PEER: + lgr_type = "ASYMMETRIC_PEER"; + break; + case SMC_LGR_ASYMMETRIC_LOCAL: + lgr_type = "ASYMMETRIC_LOCAL"; + break; + } + pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " + "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, + lgr_type, lgr->pnet_id); +} + +/* set new lgr type and tag a link as asymmetric */ +void smcr_lgr_set_type_asym(struct smc_link_group *lgr, + enum smc_lgr_type new_type, int asym_lnk_idx) +{ + smcr_lgr_set_type(lgr, new_type); + lgr->lnk[asym_lnk_idx].link_is_asym = true; +} + +/* abort connection, abort_work scheduled from tasklet context */ +static void smc_conn_abort_work(struct work_struct *work) +{ + struct smc_connection *conn = container_of(work, + struct smc_connection, + abort_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + + smc_conn_kill(conn, true); + sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ +} + /* link is up - establish alternate link if applicable */ static void smcr_link_up(struct smc_link_group *lgr, struct smc_ib_device *smcibdev, u8 ibport) @@ -943,13 +1168,12 @@ static void smcr_link_down(struct smc_link *lnk) return; smc_ib_modify_qp_reset(lnk); - to_lnk = NULL; - /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */ + to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ - smcr_link_clear(lnk); + smcr_link_clear(lnk, true); return; } - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); del_link_id = lnk->link_id; if (lgr->role == SMC_SERV) { @@ -1138,7 +1362,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; - rc = smc_lgr_register_conn(conn); /* add conn to lgr */ + rc = smc_lgr_register_conn(conn, false); write_unlock_bh(&lgr->conns_lock); if (!rc && delayed_work_pending(&lgr->free_work)) cancel_delayed_work(&lgr->free_work); @@ -1166,7 +1390,7 @@ create: goto out; lgr = conn->lgr; write_lock_bh(&lgr->conns_lock); - rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */ + rc = smc_lgr_register_conn(conn, true); write_unlock_bh(&lgr->conns_lock); if (rc) goto out; @@ -1174,6 +1398,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); smcd_cdc_rx_init(conn); /* init tasklet for this conn */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 7fe53feb9dc4..86d160f0d187 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -70,6 +70,8 @@ struct smc_rdma_wr { /* work requests per message struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; }; +#define SMC_LGR_ID_SIZE 4 + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -85,6 +87,7 @@ struct smc_link { struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ + struct completion *wr_tx_compl; /* WR send CQE completion */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ atomic_long_t wr_tx_id; /* seq # of last sent WR */ @@ -115,7 +118,10 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */ + u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ + u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ @@ -176,7 +182,6 @@ struct smc_rtoken { /* address/key of remote RMB */ u32 rkey; }; -#define SMC_LGR_ID_SIZE 4 #define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */ #define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */ /* theoretically, the RFC states that largest size would be 512K, @@ -269,6 +274,8 @@ struct smc_link_group { /* protects llc flow */ int llc_testlink_time; /* link keep alive time */ + u32 llc_termination_rsn; + /* rsn code for termination */ }; struct { /* SMC-D */ u64 peer_gid; @@ -376,10 +383,15 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); -void smcr_link_clear(struct smc_link *lnk); +void smcr_link_clear(struct smc_link *lnk, bool log); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); +void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); +void smcr_lgr_set_type_asym(struct smc_link_group *lgr, + enum smc_lgr_type new_type, int asym_lnk_idx); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); +struct smc_link *smc_switch_conns(struct smc_link_group *lgr, + struct smc_link *from_lnk, bool is_dev_err); void smcr_link_down_cond(struct smc_link *lnk); void smcr_link_down_cond_sched(struct smc_link *lnk); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 2c743caad69a..f0a5064bf9bd 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -575,6 +575,8 @@ static void smc_ib_add_dev(struct ib_device *ibdev) /* trigger reading of the port attributes */ port_cnt = smcibdev->ibdev->phys_port_cnt; + pr_warn_ratelimited("smc: adding ib device %s with port count %d\n", + smcibdev->ibdev->name, port_cnt); for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { @@ -583,6 +585,13 @@ static void smc_ib_add_dev(struct ib_device *ibdev) if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, smcibdev->pnetid[i])) smc_pnetid_by_table_ib(smcibdev, i + 1); + pr_warn_ratelimited("smc: ib device %s port %d has pnetid " + "%.16s%s\n", + smcibdev->ibdev->name, i + 1, + smcibdev->pnetid[i], + smcibdev->pnetid_by_user[i] ? + " (user defined)" : + ""); } schedule_work(&smcibdev->port_event_work); } @@ -599,6 +608,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); + pr_warn_ratelimited("smc: removing ib device %s\n", + smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 32be2da2cb85..91f85fc09fb8 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -321,12 +321,18 @@ int smcd_register_dev(struct smcd_dev *smcd) list_add_tail(&smcd->list, &smcd_dev_list.list); spin_unlock(&smcd_dev_list.lock); + pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", + dev_name(&smcd->dev), smcd->pnetid, + smcd->pnetid_by_user ? " (user defined)" : ""); + return device_add(&smcd->dev); } EXPORT_SYMBOL_GPL(smcd_register_dev); void smcd_unregister_dev(struct smcd_dev *smcd) { + pr_warn_ratelimited("smc: removing smcd device %s\n", + dev_name(&smcd->dev)); spin_lock(&smcd_dev_list.lock); list_del_init(&smcd->list); spin_unlock(&smcd_dev_list.lock); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 7675ccd6f3c3..391237b601fe 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -361,7 +361,6 @@ static int smc_llc_add_pending_send(struct smc_link *link, int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) { - struct smc_link_group *lgr = smc_get_lgr(link); struct smc_llc_msg_confirm_link *confllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; @@ -382,7 +381,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; - memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); + memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE); confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* send llc message */ rc = smc_wr_tx_send(link, pend); @@ -560,6 +559,25 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) return smc_wr_tx_send(link, pend); } +/* schedule an llc send on link, may wait for buffers, + * and wait for send completion notification. + * @return 0 on success + */ +static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) +{ + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + if (!smc_link_usable(link)) + return -ENOLINK; + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); + return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); +} + /********************************* receive ***********************************/ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, @@ -732,7 +750,6 @@ static int smc_llc_cli_conf_link(struct smc_link *link, enum smc_lgr_type lgr_new_t) { struct smc_link_group *lgr = link->lgr; - struct smc_llc_msg_del_link *del_llc; struct smc_llc_qentry *qentry = NULL; int rc = 0; @@ -746,12 +763,12 @@ static int smc_llc_cli_conf_link(struct smc_link *link, } if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* received DELETE_LINK instead */ - del_llc = &qentry->msg.delete_link; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } + smc_llc_save_peer_uid(qentry); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_modify_qp_rts(link_new); @@ -777,7 +794,11 @@ static int smc_llc_cli_conf_link(struct smc_link *link, return -ENOLINK; } smc_llc_link_active(link_new); - lgr->type = lgr_new_t; + if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) + smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); + else + smcr_lgr_set_type(lgr, lgr_new_t); return 0; } @@ -822,7 +843,8 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); - lnk_new->link_id = llc->link_num; + lnk_new->link_id = llc->link_num; /* SMC server assigns link id */ + smc_llc_link_set_uid(lnk_new); rc = smc_ib_ready_link(lnk_new); if (rc) @@ -846,7 +868,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (!rc) goto out; out_clear_lnk: - smcr_link_clear(lnk_new); + smcr_link_clear(lnk_new, false); out_reject: smc_llc_cli_add_link_reject(qentry); out: @@ -933,7 +955,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr) return; /* no asymmetric link */ if (!smc_link_downing(&lnk_asym->state)) return; - /* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */ + lnk_new = smc_switch_conns(lgr, lnk_asym, false); smc_wr_tx_wait_no_pending_sends(lnk_asym); if (!lnk_new) goto out_free; @@ -953,7 +975,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr) } smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); out_free: - smcr_link_clear(lnk_asym); + smcr_link_clear(lnk_asym, true); } static int smc_llc_srv_rkey_exchange(struct smc_link *link, @@ -1018,8 +1040,13 @@ static int smc_llc_srv_conf_link(struct smc_link *link, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } + smc_llc_save_peer_uid(qentry); smc_llc_link_active(link_new); - lgr->type = lgr_new_t; + if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) + smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); + else + smcr_lgr_set_type(lgr, lgr_new_t); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return 0; } @@ -1092,7 +1119,7 @@ int smc_llc_srv_add_link(struct smc_link *link) goto out_err; return 0; out_err: - smcr_link_clear(link_new); + smcr_link_clear(link_new, false); return rc; } @@ -1195,18 +1222,18 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) { - /* tbd: call smc_switch_conns(lgr, lnk_del, false); */ + smc_switch_conns(lgr, lnk_del, false); smc_wr_tx_wait_no_pending_sends(lnk_del); } - smcr_link_clear(lnk_del); + smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); if (lnk_del == lnk_asym) { /* expected deletion of asym link, don't change lgr state */ } else if (active_links == 1) { - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { - lgr->type = SMC_LGR_NONE; + smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } out_unlock: @@ -1215,6 +1242,29 @@ out: kfree(qentry); } +/* try to send a DELETE LINK ALL request on any active link, + * waiting for send completion + */ +void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) +{ + struct smc_llc_msg_del_link delllc = {0}; + int i; + + delllc.hd.common.type = SMC_LLC_DELETE_LINK; + delllc.hd.length = sizeof(delllc); + if (ord) + delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc.reason = htonl(rsn); + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_usable(&lgr->lnk[i])) + continue; + if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) + break; + } +} + static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) { struct smc_llc_msg_del_link *del_llc; @@ -1230,6 +1280,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { /* delete entire lgr */ + smc_llc_send_link_delete_all(lgr, true, ntohl( + qentry->msg.delete_link.reason)); smc_lgr_terminate_sched(lgr); goto out; } @@ -1245,7 +1297,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { - /* tbd: call smc_switch_conns(lgr, lnk_del, false); */ + smc_switch_conns(lgr, lnk_del, false); smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { @@ -1254,25 +1306,21 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) * enqueued DELETE_LINK request (forward it) */ if (!smc_llc_send_message(lnk, &qentry->msg)) { - struct smc_llc_msg_del_link *del_llc_resp; struct smc_llc_qentry *qentry2; qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME, SMC_LLC_DELETE_LINK); - if (!qentry2) { - } else { - del_llc_resp = &qentry2->msg.delete_link; + if (qentry2) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); - } } } - smcr_link_clear(lnk_del); + smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); if (active_links == 1) { - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { - lgr->type = SMC_LGR_NONE; + smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } @@ -1368,6 +1416,14 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } +static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) +{ + pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: " + "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type); + smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); + smc_lgr_terminate_sched(lgr); +} + /* flush the llc event queue */ static void smc_llc_event_flush(struct smc_link_group *lgr) { @@ -1468,6 +1524,9 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; + default: + smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); + break; } out: kfree(qentry); @@ -1527,6 +1586,9 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; + default: + smc_llc_protocol_violation(link->lgr, llc_type); + break; } kfree(qentry); } @@ -1643,6 +1705,12 @@ int smc_llc_link_init(struct smc_link *link) void smc_llc_link_active(struct smc_link *link) { + pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, " + "peerid %*phN, ibdev %s, ibport %d\n", + SMC_LGR_ID_SIZE, &link->lgr->id, + SMC_LGR_ID_SIZE, &link->link_uid, + SMC_LGR_ID_SIZE, &link->peer_link_uid, + link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; @@ -1652,8 +1720,15 @@ void smc_llc_link_active(struct smc_link *link) } /* called in worker context */ -void smc_llc_link_clear(struct smc_link *link) +void smc_llc_link_clear(struct smc_link *link, bool log) { + if (log) + pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN" + ", peerid %*phN, ibdev %s, ibport %d\n", + SMC_LGR_ID_SIZE, &link->lgr->id, + SMC_LGR_ID_SIZE, &link->link_uid, + SMC_LGR_ID_SIZE, &link->peer_link_uid, + link->smcibdev->ibdev->name, link->ibport); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); smc_wr_wakeup_reg_wait(link); @@ -1709,12 +1784,29 @@ out: return rc; } +void smc_llc_link_set_uid(struct smc_link *link) +{ + __be32 link_uid; + + link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id); + memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE); +} + +/* save peers link user id, used for debug purposes */ +void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry) +{ + memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid, + SMC_LGR_ID_SIZE); +} + /* evaluate confirm link request or response */ int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type) { - if (type == SMC_LLC_REQ) /* SMC server assigns link_id */ + if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */ qentry->link->link_id = qentry->msg.confirm_link.link_num; + smc_llc_link_set_uid(qentry->link); + } if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) return -ENOTSUPP; return 0; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index c335fc5f363c..a5d2fe3eea61 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -60,6 +60,14 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) return NULL; } +/* set the termination reason code for the link group */ +static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr, + u32 rsn) +{ + if (!lgr->llc_termination_rsn) + lgr->llc_termination_rsn = rsn; +} + /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); @@ -74,7 +82,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc); void smc_llc_lgr_clear(struct smc_link_group *lgr); int smc_llc_link_init(struct smc_link *link); void smc_llc_link_active(struct smc_link *link); -void smc_llc_link_clear(struct smc_link *link); +void smc_llc_link_clear(struct smc_link *link, bool log); int smc_llc_do_confirm_rkey(struct smc_link *send_link, struct smc_buf_desc *rmb_desc); int smc_llc_do_delete_rkey(struct smc_link_group *lgr, @@ -84,11 +92,15 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow); int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type); +void smc_llc_link_set_uid(struct smc_link *link); +void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry); struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, struct smc_link *lnk, int time_out, u8 exp_msg); struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow); void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); +void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, + u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_srv_add_link(struct smc_link *link); void smc_llc_srv_add_link_local(struct smc_link *link); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 50c96e843fab..be03f1260d59 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -110,8 +110,14 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) if (!pnet_name || smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); - if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) + if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { dev_put(pnetelem->ndev); + pr_warn_ratelimited("smc: net device %s " + "erased user defined " + "pnetid %.16s\n", + pnetelem->eth_name, + pnetelem->pnet_name); + } kfree(pnetelem); rc = 0; } @@ -130,6 +136,12 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) (!pnet_name || smc_pnet_match(pnet_name, ibdev->pnetid[ibport]))) { + pr_warn_ratelimited("smc: ib device %s ibport " + "%d erased user defined " + "pnetid %.16s\n", + ibdev->ibdev->name, + ibport + 1, + ibdev->pnetid[ibport]); memset(ibdev->pnetid[ibport], 0, SMC_MAX_PNETID_LEN); ibdev->pnetid_by_user[ibport] = false; @@ -144,6 +156,10 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) if (smcd_dev->pnetid_by_user && (!pnet_name || smc_pnet_match(pnet_name, smcd_dev->pnetid))) { + pr_warn_ratelimited("smc: smcd device %s " + "erased user defined pnetid " + "%.16s\n", dev_name(&smcd_dev->dev), + smcd_dev->pnetid); memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = false; rc = 0; @@ -174,6 +190,10 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) dev_hold(ndev); pnetelem->ndev = ndev; rc = 0; + pr_warn_ratelimited("smc: adding net device %s with " + "user defined pnetid %.16s\n", + pnetelem->eth_name, + pnetelem->pnet_name); break; } } @@ -201,6 +221,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) dev_put(pnetelem->ndev); pnetelem->ndev = NULL; rc = 0; + pr_warn_ratelimited("smc: removing net device %s with " + "user defined pnetid %.16s\n", + pnetelem->eth_name, + pnetelem->pnet_name); break; } } @@ -357,6 +381,10 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, kfree(new_pe); goto out_put; } + if (ndev) + pr_warn_ratelimited("smc: net device %s " + "applied user defined pnetid %.16s\n", + new_pe->eth_name, new_pe->pnet_name); return 0; out_put: @@ -377,11 +405,24 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, /* try to apply the pnetid to active devices */ ib_dev = smc_pnet_find_ib(ib_name); - if (ib_dev) + if (ib_dev) { ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); + if (ibdev_applied) + pr_warn_ratelimited("smc: ib device %s ibport %d " + "applied user defined pnetid " + "%.16s\n", ib_dev->ibdev->name, + ib_port, + ib_dev->pnetid[ib_port - 1]); + } smcd_dev = smc_pnet_find_smcd(ib_name); - if (smcd_dev) + if (smcd_dev) { smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); + if (smcddev_applied) + pr_warn_ratelimited("smc: smcd device %s " + "applied user defined pnetid " + "%.16s\n", dev_name(&smcd_dev->dev), + smcd_dev->pnetid); + } /* Apply fails when a device has a hardware-defined pnetid set, do not * add a pnet table entry in that case. */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 417204572a69..54ba0443847e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -482,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; + struct smc_link *link = conn->lnk; struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -505,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) } spin_lock_bh(&conn->send_lock); + if (link != conn->lnk) { + /* link of connection changed, tx_work will restart */ + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); + rc = -ENOLINK; + goto out_unlock; + } if (!pflags->urg_data_present) { rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { - smc_wr_tx_put_slot(conn->lnk, + smc_wr_tx_put_slot(link, (struct smc_wr_tx_pend_priv *)pend); goto out_unlock; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 3fd27bea4f7a..7239ba9b99dc 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -44,6 +44,7 @@ struct smc_wr_tx_pend { /* control data for a pending send request */ struct smc_link *link; u32 idx; struct smc_wr_tx_pend_priv priv; + u8 compl_requested; }; /******************************** send queue *********************************/ @@ -103,6 +104,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) if (pnd_snd_idx == link->wr_tx_cnt) return; link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; + if (link->wr_tx_pends[pnd_snd_idx].compl_requested) + complete(&link->wr_tx_compl[pnd_snd_idx]); memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); /* clear the full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[pnd_snd_idx], 0, @@ -275,6 +278,33 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +/* Send prepared WR slot via ib_post_send and wait for send completion + * notification. + * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer + */ +int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + unsigned long timeout) +{ + struct smc_wr_tx_pend *pend; + int rc; + + pend = container_of(priv, struct smc_wr_tx_pend, priv); + pend->compl_requested = 1; + init_completion(&link->wr_tx_compl[pend->idx]); + + rc = smc_wr_tx_send(link, priv); + if (rc) + return rc; + /* wait for completion by smc_wr_tx_process_cqe() */ + rc = wait_for_completion_interruptible_timeout( + &link->wr_tx_compl[pend->idx], timeout); + if (rc <= 0) + rc = -ENODATA; + if (rc > 0) + rc = 0; + return rc; +} + /* Register a memory region and wait for result. */ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) { @@ -555,6 +585,8 @@ void smc_wr_free_link(struct smc_link *lnk) void smc_wr_free_link_mem(struct smc_link *lnk) { + kfree(lnk->wr_tx_compl); + lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); lnk->wr_tx_pends = NULL; kfree(lnk->wr_tx_mask); @@ -625,8 +657,15 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_pends) goto no_mem_wr_tx_mask; + link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_compl[0]), + GFP_KERNEL); + if (!link->wr_tx_compl) + goto no_mem_wr_tx_pends; return 0; +no_mem_wr_tx_pends: + kfree(link->wr_tx_pends); no_mem_wr_tx_mask: kfree(link->wr_tx_mask); no_mem_wr_rx_sges: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f7eaeb3391f3..423b8709f1c9 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -101,6 +101,8 @@ int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); +int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, diff --git a/net/socket.c b/net/socket.c index 2dd739fba866..1c9a7260a41d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -924,14 +924,9 @@ EXPORT_SYMBOL(sock_recvmsg); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { - mm_segment_t oldfs = get_fs(); - int result; - + msg->msg_control_is_user = false; iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); - set_fs(KERNEL_DS); - result = sock_recvmsg(sock, msg, flags); - set_fs(oldfs); - return result; + return sock_recvmsg(sock, msg, flags); } EXPORT_SYMBOL(kernel_recvmsg); @@ -2239,7 +2234,8 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; - kmsg->msg_control = (void __force *)msg.msg_control; + kmsg->msg_control_is_user = true; + kmsg->msg_control_user = msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; @@ -2331,16 +2327,10 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, goto out; } err = -EFAULT; - /* - * Careful! Before this, msg_sys->msg_control contains a user pointer. - * Afterwards, it will be a kernel pointer. Thus the compiler-assisted - * checking falls down on this. - */ - if (copy_from_user(ctl_buf, - (void __user __force *)msg_sys->msg_control, - ctl_len)) + if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len)) goto out_freectl; msg_sys->msg_control = ctl_buf; + msg_sys->msg_control_is_user = false; } msg_sys->msg_flags = flags; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 325a0858700f..8350d3a2e9a7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -880,6 +880,20 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); /* * Free an RPC client */ +static void rpc_free_client_work(struct work_struct *work) +{ + struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); + + /* These might block on processes that might allocate memory, + * so they cannot be called in rpciod, so they are handled separately + * here. + */ + rpc_clnt_debugfs_unregister(clnt); + rpc_clnt_remove_pipedir(clnt); + + kfree(clnt); + rpciod_down(); +} static struct rpc_clnt * rpc_free_client(struct rpc_clnt *clnt) { @@ -890,17 +904,16 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; - rpc_clnt_debugfs_unregister(clnt); - rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); - rpciod_down(); put_cred(clnt->cl_cred); rpc_free_clid(clnt); - kfree(clnt); + + INIT_WORK(&clnt->cl_work, rpc_free_client_work); + schedule_work(&clnt->cl_work); return parent; } @@ -2808,8 +2821,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, &rpc_cb_add_xprt_call_ops, data); - if (IS_ERR(task)) - return PTR_ERR(task); + rpc_put_task(task); success: return 1; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 4a81e6995d3e..3c627dc685cc 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -388,7 +388,9 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, } while (nsegs); done: - return xdr_stream_encode_item_absent(xdr); + if (xdr_stream_encode_item_absent(xdr) < 0) + return -EMSGSIZE; + return 0; } /* Register and XDR encode the Write list. Supports encoding a list @@ -454,7 +456,9 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, *segcount = cpu_to_be32(nchunks); done: - return xdr_stream_encode_item_absent(xdr); + if (xdr_stream_encode_item_absent(xdr) < 0) + return -EMSGSIZE; + return 0; } /* Register and XDR encode the Reply chunk. Supports encoding an array @@ -480,8 +484,11 @@ static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, int nsegs, nchunks; __be32 *segcount; - if (wtype != rpcrdma_replych) - return xdr_stream_encode_item_absent(xdr); + if (wtype != rpcrdma_replych) { + if (xdr_stream_encode_item_absent(xdr) < 0) + return -EMSGSIZE; + return 0; + } seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cdd84c09df10..05c4d3a9cda2 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -289,6 +289,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: ep->re_connect_status = -ECONNABORTED; disconnected: + xprt_force_disconnect(xprt); return rpcrdma_ep_destroy(ep); default: break; @@ -1355,8 +1356,8 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) --ep->re_send_count; } + trace_xprtrdma_post_send(req); rc = frwr_send(r_xprt, req); - trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; return 0; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 3a12fc18239b..73dbed0c4b6b 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -402,10 +402,11 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) read_lock_bh(&sk->sk_callback_lock); ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); + if (!ret) + return 0; } - if (ret < 0) - tipc_conn_close(con); + tipc_conn_close(con); return ret; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c98e602a1a2d..e23f94a5549b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -800,6 +800,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); } + if (psock) + sk_psock_put(sk, psock); return err; } more_data: @@ -2081,8 +2083,9 @@ static void tls_data_ready(struct sock *sk) strp_data_ready(&ctx->strp); psock = sk_psock_get(sk); - if (psock && !list_empty(&psock->ingress_msg)) { - ctx->saved_data_ready(sk); + if (psock) { + if (!list_empty(&psock->ingress_msg)) + ctx->saved_data_ready(sk); sk_psock_put(sk, psock); } } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 709038a4783e..69efc891885f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) { + if (pkt->tap_delivered) + return; + vsock_deliver_tap(virtio_transport_build_skb, pkt); + pkt->tap_delivered = true; } EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 8aa415a38814..0285aaa1e93c 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -357,6 +357,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause, sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } + if (x25->neighbour) { + read_lock_bh(&x25_list_lock); + x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + } } /* |