diff options
206 files changed, 6449 insertions, 4825 deletions
diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt new file mode 100644 index 000000000000..921d522fe8d7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/imx-dwmac.txt @@ -0,0 +1,56 @@ +IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. + +This file documents platform glue layer for IMX. +Please see stmmac.txt for the other unchanged properties. + +The device node has following properties. + +Required properties: +- compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer + and "snps,dwmac-5.10a" to select IP version. +- clocks: Must contain a phandle for each entry in clock-names. +- clock-names: Should be "stmmaceth" for the host clock. + Should be "pclk" for the MAC apb clock. + Should be "ptp_ref" for the MAC timer clock. + Should be "tx" for the MAC RGMII TX clock: + Should be "mem" for EQOS MEM clock. + - "mem" clock is required for imx8dxl platform. + - "mem" clock is not required for imx8mp platform. +- interrupt-names: Should contain a list of interrupt names corresponding to + the interrupts in the interrupts property, if available. + Should be "macirq" for the main MAC IRQ + Should be "eth_wake_irq" for the IT which wake up system +- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which + encompases the GPR register, and the offset of the GPR register. + - required for imx8mp platform. + - is optional for imx8dxl platform. + +Optional properties: +- intf_mode: is optional for imx8dxl platform. +- snps,rmii_refclk_ext: to select RMII reference clock from external. + +Example: + eqos: ethernet@30bf0000 { + compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; + reg = <0x30bf0000 0x10000>; + interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "eth_wake_irq", "macirq"; + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, + <&clk IMX8MP_CLK_ENET_QOS>; + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; + assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, + <&clk IMX8MP_CLK_ENET_QOS>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, + <&clk IMX8MP_SYS_PLL2_100M>, + <&clk IMX8MP_SYS_PLL2_125M>; + assigned-clock-rates = <0>, <100000000>, <125000000>; + nvmem-cells = <ð_mac0>; + nvmem-cell-names = "mac-address"; + nvmem_macaddr_swap; + intf_mode = <&gpr 0x4>; + status = "disabled"; + }; diff --git a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml index f85d91a9d6e5..aea88e621792 100644 --- a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml +++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/net/mediatek,eth-mac.yaml# +$id: http://devicetree.org/schemas/net/mediatek,star-emac.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: MediaTek STAR Ethernet MAC Controller diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index 5ad35113d0f4..68552b92dc44 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -477,7 +477,7 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level: Encrypted checksum plus packet padded and first eight bytes of packet encrypted - which includes the actual packet length. - (c) RXRPC_SECURITY_ENCRYPTED + (c) RXRPC_SECURITY_ENCRYPT Encrypted checksum plus entire packet padded and encrypted, including actual packet length. @@ -578,7 +578,7 @@ A client would issue an operation by: This issues a request_key() to get the key representing the security context. The minimum security level can be set:: - unsigned int sec = RXRPC_SECURITY_ENCRYPTED; + unsigned int sec = RXRPC_SECURITY_ENCRYPT; setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, &sec, sizeof(sec)); @@ -1090,6 +1090,15 @@ The kernel interface functions are as follows: jiffies). In the event of the timeout occurring, the call will be aborted and -ETIME or -ETIMEDOUT will be returned. + (#) Apply the RXRPC_MIN_SECURITY_LEVEL sockopt to a socket from within in the + kernel:: + + int rxrpc_sock_set_min_security_level(struct sock *sk, + unsigned int val); + + This specifies the minimum security level required for calls on this + socket. + Configurable Parameters ======================= diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index aae99a2d7bd4..14345a87c7cc 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1570,34 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); -static inline void drbd_tcp_cork(struct socket *sock) -{ - int val = 1; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, - (char*)&val, sizeof(val)); -} - -static inline void drbd_tcp_uncork(struct socket *sock) -{ - int val = 0; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, - (char*)&val, sizeof(val)); -} - -static inline void drbd_tcp_nodelay(struct socket *sock) -{ - int val = 1; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char*)&val, sizeof(val)); -} - -static inline void drbd_tcp_quickack(struct socket *sock) -{ - int val = 2; - (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, - (char*)&val, sizeof(val)); -} - /* sets the number of 512 byte sectors of our virtual device */ void drbd_set_my_capacity(struct drbd_device *device, sector_t size); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c094c3c2c5d4..45fbd526c453 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr, /* DRBD protocol "pings" are latency critical. * This is supposed to trigger tcp_push_pending_frames() */ if (!err && (cmd == P_PING || cmd == P_PING_ACK)) - drbd_tcp_nodelay(sock->socket); + tcp_sock_set_nodelay(sock->socket->sk); return err; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c15e7083b13a..3a3f2b6a821f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1051,8 +1051,8 @@ randomize: /* we don't want delays. * we use TCP_CORK where appropriate, though */ - drbd_tcp_nodelay(sock.socket); - drbd_tcp_nodelay(msock.socket); + tcp_sock_set_nodelay(sock.socket->sk); + tcp_sock_set_nodelay(msock.socket->sk); connection->data.socket = sock.socket; connection->meta.socket = msock.socket; @@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, str * quickly as possible, and let remote TCP know what we have * received so far. */ if (err == -EAGAIN) { - drbd_tcp_quickack(connection->data.socket); + tcp_sock_set_quickack(connection->data.socket->sk, 2); drbd_unplug_all_devices(connection); } if (err > 0) { @@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection *connection, struct packe { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(connection->data.socket); - + tcp_sock_set_quickack(connection->data.socket->sk, 2); return 0; } @@ -6162,7 +6161,7 @@ void drbd_send_acks_wf(struct work_struct *ws) rcu_read_unlock(); if (tcp_cork) - drbd_tcp_cork(connection->meta.socket); + tcp_sock_set_cork(connection->meta.socket->sk, true); err = drbd_finish_peer_reqs(device); kref_put(&device->kref, drbd_destroy_device); @@ -6175,7 +6174,7 @@ void drbd_send_acks_wf(struct work_struct *ws) } if (tcp_cork) - drbd_tcp_uncork(connection->meta.socket); + tcp_sock_set_cork(connection->meta.socket->sk, false); return; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0dc019da1f8d..2b89c9f2ca70 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -2098,7 +2098,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * if (uncork) { mutex_lock(&connection->data.mutex); if (connection->data.socket) - drbd_tcp_uncork(connection->data.socket); + tcp_sock_set_cork(connection->data.socket->sk, false); mutex_unlock(&connection->data.mutex); } @@ -2153,9 +2153,9 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * mutex_lock(&connection->data.mutex); if (connection->data.socket) { if (cork) - drbd_tcp_cork(connection->data.socket); + tcp_sock_set_cork(connection->data.socket->sk, true); else if (!uncork) - drbd_tcp_uncork(connection->data.socket); + tcp_sock_set_cork(connection->data.socket->sk, false); } mutex_unlock(&connection->data.mutex); } diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 69cb7e6e8955..3a0601c2052c 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -404,7 +404,10 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) { return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions); + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + max_modify_header_actions); } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 65e0e24d463b..566b42f3fb18 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3698,12 +3698,13 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!dest_num) rule_dst = NULL; } else { + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + flow_act.action |= + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else - flow_act.action |= - dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + else if (dest_num) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && @@ -3747,30 +3748,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); } -static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_dst = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); - if (!IS_ERR(handler)) { - handler_dst = create_flow_rule(dev, ft_prio, - flow_attr, dst); - if (IS_ERR(handler_dst)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_dst; - } else { - list_add(&handler_dst->list, &handler->list); - } - } - - return handler; -} enum { LEFTOVERS_MC, LEFTOVERS_UC, @@ -3974,15 +3951,11 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { - handler = create_dont_trap_rule(dev, ft_prio, - flow_attr, dst); - } else { - underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? - mqp->underlay_qpn : 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn, ucmd); - } + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? + mqp->underlay_qpn : + 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, + underlay_qpn, ucmd); } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 559e5fd3bad8..1662216be66d 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rv) { - siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(new_s->sk); new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); @@ -1312,17 +1304,14 @@ static void siw_cm_llp_state_change(struct sock *sk) static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, struct sockaddr *raddr) { - int rv, flags = 0, s_val = 1; + int rv, flags = 0; size_t size = laddr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); /* * Make address available again asap. */ - rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, - sizeof(s_val)); - if (rv < 0) - return rv; + sock_set_reuseaddr(s->sk); rv = s->ops->bind(s, laddr, size); if (rv < 0) @@ -1389,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); goto error; } - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, - sizeof(val)); - if (rv) { - siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(s->sk); cep = siw_cep_alloc(sdev); if (!cep) { rv = -ENOMEM; @@ -1781,7 +1762,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct siw_cep *cep = NULL; struct siw_device *sdev = to_siw_dev(id->device); int addr_family = id->local_addr.ss_family; - int rv = 0, s_val; + int rv = 0; if (addr_family != AF_INET && addr_family != AF_INET6) return -EAFNOSUPPORT; @@ -1793,13 +1774,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) /* * Allow binding local port when still in TIME_WAIT from last close. */ - s_val = 1; - rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, - sizeof(s_val)); - if (rv) { - siw_dbg(id->device, "setsockopt error: %d\n", rv); - goto error; - } + sock_set_reuseaddr(s->sk); + if (addr_family == AF_INET) { struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 198d2a7d7f95..29ed21687295 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -84,6 +84,7 @@ struct sja1105_info { * the egress timestamps. */ int ptpegr_ts_bytes; + int num_cbs_shapers; const struct sja1105_dynamic_table_ops *dyn_ops; const struct sja1105_table_ops *static_ops; const struct sja1105_regs *regs; @@ -218,6 +219,7 @@ struct sja1105_private { struct mutex mgmt_lock; bool expect_dsa_8021q; enum sja1105_vlan_state vlan_state; + struct sja1105_cbs_entry *cbs; struct sja1105_tagger_data tagger_data; struct sja1105_ptp_data ptp_data; struct sja1105_tas_data tas_data; @@ -321,24 +323,6 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); -/* Common implementations for the static and dynamic configs */ -size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); -size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr, - enum packing_op op); - /* From sja1105_flower.c */ int sja1105_cls_flower_del(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 2a8fbd7fdedc..4471eeccc293 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -127,17 +127,29 @@ #define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_DYN_CMD \ SJA1105_SIZE_DYN_CMD +#define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_DYN_CMD \ + (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY) + #define SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD \ SJA1105_SIZE_DYN_CMD +#define SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD \ + (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY) + #define SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD \ (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY) #define SJA1105_SIZE_RETAGGING_DYN_CMD \ (SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_RETAGGING_ENTRY) +#define SJA1105ET_SIZE_CBS_DYN_CMD \ + (SJA1105_SIZE_DYN_CMD + SJA1105ET_SIZE_CBS_ENTRY) + +#define SJA1105PQRS_SIZE_CBS_DYN_CMD \ + (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_CBS_ENTRY) + #define SJA1105_MAX_DYN_CMD_SIZE \ - SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD + SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD struct sja1105_dyn_cmd { bool search; @@ -495,6 +507,18 @@ sja1105et_l2_lookup_params_entry_packing(void *buf, void *entry_ptr, } static void +sja1105pqrs_l2_lookup_params_cmd_packing(void *buf, + struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op); +} + +static void sja1105et_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { @@ -517,6 +541,18 @@ sja1105et_general_params_entry_packing(void *buf, void *entry_ptr, } static void +sja1105pqrs_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->errors, 30, 30, size, op); + sja1105_packing(p, &cmd->rdwrset, 28, 28, size, op); +} + +static void sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { @@ -542,6 +578,60 @@ sja1105_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, sja1105_packing(p, &cmd->index, 5, 0, size, op); } +static void sja1105et_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105ET_SIZE_CBS_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->index, 19, 16, size, op); +} + +static size_t sja1105et_cbs_entry_packing(void *buf, void *entry_ptr, + enum packing_op op) +{ + const size_t size = SJA1105ET_SIZE_CBS_ENTRY; + struct sja1105_cbs_entry *entry = entry_ptr; + u8 *cmd = buf + size; + u32 *p = buf; + + sja1105_packing(cmd, &entry->port, 5, 3, SJA1105_SIZE_DYN_CMD, op); + sja1105_packing(cmd, &entry->prio, 2, 0, SJA1105_SIZE_DYN_CMD, op); + sja1105_packing(p + 3, &entry->credit_lo, 31, 0, size, op); + sja1105_packing(p + 2, &entry->credit_hi, 31, 0, size, op); + sja1105_packing(p + 1, &entry->send_slope, 31, 0, size, op); + sja1105_packing(p + 0, &entry->idle_slope, 31, 0, size, op); + return size; +} + +static void sja1105pqrs_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + u8 *p = buf + SJA1105PQRS_SIZE_CBS_ENTRY; + const int size = SJA1105_SIZE_DYN_CMD; + + sja1105_packing(p, &cmd->valid, 31, 31, size, op); + sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op); + sja1105_packing(p, &cmd->errors, 29, 29, size, op); + sja1105_packing(p, &cmd->index, 3, 0, size, op); +} + +static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr, + enum packing_op op) +{ + const size_t size = SJA1105PQRS_SIZE_CBS_ENTRY; + struct sja1105_cbs_entry *entry = entry_ptr; + + sja1105_packing(buf, &entry->port, 159, 157, size, op); + sja1105_packing(buf, &entry->prio, 156, 154, size, op); + sja1105_packing(buf, &entry->credit_lo, 153, 122, size, op); + sja1105_packing(buf, &entry->credit_hi, 121, 90, size, op); + sja1105_packing(buf, &entry->send_slope, 89, 58, size, op); + sja1105_packing(buf, &entry->idle_slope, 57, 26, size, op); + return size; +} + #define OP_READ BIT(0) #define OP_WRITE BIT(1) #define OP_DEL BIT(2) @@ -631,6 +721,14 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = { .packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD, .addr = 0x31, }, + [BLK_IDX_CBS] = { + .entry_packing = sja1105et_cbs_entry_packing, + .cmd_packing = sja1105et_cbs_cmd_packing, + .max_entry_count = SJA1105ET_MAX_CBS_COUNT, + .access = OP_WRITE, + .packed_size = SJA1105ET_SIZE_CBS_DYN_CMD, + .addr = 0x2c, + }, [BLK_IDX_XMII_PARAMS] = {0}, }; @@ -693,12 +791,12 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = { [BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0}, [BLK_IDX_VL_FORWARDING_PARAMS] = {0}, [BLK_IDX_L2_LOOKUP_PARAMS] = { - .entry_packing = sja1105et_l2_lookup_params_entry_packing, - .cmd_packing = sja1105et_l2_lookup_params_cmd_packing, + .entry_packing = sja1105pqrs_l2_lookup_params_entry_packing, + .cmd_packing = sja1105pqrs_l2_lookup_params_cmd_packing, .max_entry_count = SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT, .access = (OP_READ | OP_WRITE), - .packed_size = SJA1105ET_SIZE_L2_LOOKUP_PARAMS_DYN_CMD, - .addr = 0x38, + .packed_size = SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_DYN_CMD, + .addr = 0x54, }, [BLK_IDX_L2_FORWARDING_PARAMS] = {0}, [BLK_IDX_AVB_PARAMS] = { @@ -710,12 +808,12 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = { .addr = 0x8003, }, [BLK_IDX_GENERAL_PARAMS] = { - .entry_packing = sja1105et_general_params_entry_packing, - .cmd_packing = sja1105et_general_params_cmd_packing, + .entry_packing = sja1105pqrs_general_params_entry_packing, + .cmd_packing = sja1105pqrs_general_params_cmd_packing, .max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT, - .access = OP_WRITE, - .packed_size = SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD, - .addr = 0x34, + .access = (OP_READ | OP_WRITE), + .packed_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD, + .addr = 0x3B, }, [BLK_IDX_RETAGGING] = { .entry_packing = sja1105_retagging_entry_packing, @@ -725,6 +823,14 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = { .packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD, .addr = 0x38, }, + [BLK_IDX_CBS] = { + .entry_packing = sja1105pqrs_cbs_entry_packing, + .cmd_packing = sja1105pqrs_cbs_cmd_packing, + .max_entry_count = SJA1105PQRS_MAX_CBS_COUNT, + .access = OP_WRITE, + .packed_size = SJA1105PQRS_SIZE_CBS_DYN_CMD, + .addr = 0x32, + }, [BLK_IDX_XMII_PARAMS] = {0}, }; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 44ce7882dfb1..789b288cc78b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1640,6 +1640,92 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port, sja1105_bridge_member(ds, port, br, false); } +#define BYTES_PER_KBIT (1000LL / 8) + +static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv) +{ + int i; + + for (i = 0; i < priv->info->num_cbs_shapers; i++) + if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope) + return i; + + return -1; +} + +static int sja1105_delete_cbs_shaper(struct sja1105_private *priv, int port, + int prio) +{ + int i; + + for (i = 0; i < priv->info->num_cbs_shapers; i++) { + struct sja1105_cbs_entry *cbs = &priv->cbs[i]; + + if (cbs->port == port && cbs->prio == prio) { + memset(cbs, 0, sizeof(*cbs)); + return sja1105_dynamic_config_write(priv, BLK_IDX_CBS, + i, cbs, true); + } + } + + return 0; +} + +static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port, + struct tc_cbs_qopt_offload *offload) +{ + struct sja1105_private *priv = ds->priv; + struct sja1105_cbs_entry *cbs; + int index; + + if (!offload->enable) + return sja1105_delete_cbs_shaper(priv, port, offload->queue); + + index = sja1105_find_unused_cbs_shaper(priv); + if (index < 0) + return -ENOSPC; + + cbs = &priv->cbs[index]; + cbs->port = port; + cbs->prio = offload->queue; + /* locredit and sendslope are negative by definition. In hardware, + * positive values must be provided, and the negative sign is implicit. + */ + cbs->credit_hi = offload->hicredit; + cbs->credit_lo = abs(offload->locredit); + /* User space is in kbits/sec, hardware in bytes/sec */ + cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT; + cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT); + /* Convert the negative values from 64-bit 2's complement + * to 32-bit 2's complement (for the case of 0x80000000 whose + * negative is still negative). + */ + cbs->credit_lo &= GENMASK_ULL(31, 0); + cbs->send_slope &= GENMASK_ULL(31, 0); + + return sja1105_dynamic_config_write(priv, BLK_IDX_CBS, index, cbs, + true); +} + +static int sja1105_reload_cbs(struct sja1105_private *priv) +{ + int rc = 0, i; + + for (i = 0; i < priv->info->num_cbs_shapers; i++) { + struct sja1105_cbs_entry *cbs = &priv->cbs[i]; + + if (!cbs->idle_slope && !cbs->send_slope) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_CBS, i, cbs, + true); + if (rc) + break; + } + + return rc; +} + static const char * const sja1105_reset_reasons[] = { [SJA1105_VLAN_FILTERING] = "VLAN filtering", [SJA1105_RX_HWTSTAMPING] = "RX timestamping", @@ -1754,6 +1840,10 @@ out_unlock_ptp: sja1105_sgmii_pcs_force_speed(priv, speed); } } + + rc = sja1105_reload_cbs(priv); + if (rc < 0) + goto out; out: mutex_unlock(&priv->mgmt_lock); @@ -2656,6 +2746,10 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) sja1105_frame_memory_partitioning(priv); + rc = sja1105_build_vlan_table(priv, false); + if (rc) + return rc; + rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING); if (rc) dev_err(ds->dev, "Failed to change VLAN Ethertype\n"); @@ -3131,6 +3225,8 @@ static int sja1105_port_setup_tc(struct dsa_switch *ds, int port, switch (type) { case TC_SETUP_QDISC_TAPRIO: return sja1105_setup_tc_taprio(ds, port, type_data); + case TC_SETUP_QDISC_CBS: + return sja1105_setup_tc_cbs(ds, port, type_data); default: return -EOPNOTSUPP; } @@ -3408,6 +3504,14 @@ static int sja1105_probe(struct spi_device *spi) if (rc) return rc; + if (IS_ENABLED(CONFIG_NET_SCH_CBS)) { + priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers, + sizeof(struct sja1105_cbs_entry), + GFP_KERNEL); + if (!priv->cbs) + return -ENOMEM; + } + /* Connections between dsa_port and sja1105_port */ for (port = 0; port < SJA1105_NUM_PORTS; port++) { struct sja1105_port *sp = &priv->ports[port]; diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index a0dacae803cc..bb52b9c841b2 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -515,6 +515,7 @@ struct sja1105_info sja1105e_info = { .qinq_tpid = ETH_P_8021Q, .ptp_ts_bits = 24, .ptpegr_ts_bytes = 4, + .num_cbs_shapers = SJA1105ET_MAX_CBS_COUNT, .reset_cmd = sja1105et_reset_cmd, .fdb_add_cmd = sja1105et_fdb_add, .fdb_del_cmd = sja1105et_fdb_del, @@ -530,6 +531,7 @@ struct sja1105_info sja1105t_info = { .qinq_tpid = ETH_P_8021Q, .ptp_ts_bits = 24, .ptpegr_ts_bytes = 4, + .num_cbs_shapers = SJA1105ET_MAX_CBS_COUNT, .reset_cmd = sja1105et_reset_cmd, .fdb_add_cmd = sja1105et_fdb_add, .fdb_del_cmd = sja1105et_fdb_del, @@ -545,6 +547,7 @@ struct sja1105_info sja1105p_info = { .qinq_tpid = ETH_P_8021AD, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, + .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT, .setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, @@ -561,6 +564,7 @@ struct sja1105_info sja1105q_info = { .qinq_tpid = ETH_P_8021AD, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, + .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT, .setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, @@ -577,6 +581,7 @@ struct sja1105_info sja1105r_info = { .qinq_tpid = ETH_P_8021AD, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, + .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT, .setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, @@ -594,6 +599,7 @@ struct sja1105_info sja1105s_info = { .qinq_tpid = ETH_P_8021AD, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, + .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT, .setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay, .reset_cmd = sja1105pqrs_reset_cmd, .fdb_add_cmd = sja1105pqrs_fdb_add, diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 780aca034cdc..ff3fe471efc2 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -146,9 +146,8 @@ static size_t sja1105et_general_params_entry_packing(void *buf, void *entry_ptr, /* TPID and TPID2 are intentionally reversed so that semantic * compatibility with E/T is kept. */ -static size_t -sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr, - enum packing_op op) +size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op) { const size_t size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY; struct sja1105_general_params_entry *entry = entry_ptr; @@ -228,9 +227,8 @@ sja1105et_l2_lookup_params_entry_packing(void *buf, void *entry_ptr, return size; } -static size_t -sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr, - enum packing_op op) +size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op) { const size_t size = SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY; struct sja1105_l2_lookup_params_entry *entry = entry_ptr; diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index 5946847bb5b9..ee0f10062763 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -30,11 +30,13 @@ #define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_ENTRY 4 #define SJA1105ET_SIZE_GENERAL_PARAMS_ENTRY 40 #define SJA1105ET_SIZE_AVB_PARAMS_ENTRY 12 +#define SJA1105ET_SIZE_CBS_ENTRY 16 #define SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY 20 #define SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY 32 #define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY 16 #define SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY 44 #define SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY 16 +#define SJA1105PQRS_SIZE_CBS_ENTRY 20 /* UM10944.pdf Page 11, Table 2. Configuration Blocks */ enum { @@ -56,6 +58,7 @@ enum { BLKID_AVB_PARAMS = 0x10, BLKID_GENERAL_PARAMS = 0x11, BLKID_RETAGGING = 0x12, + BLKID_CBS = 0x13, BLKID_XMII_PARAMS = 0x4E, }; @@ -78,6 +81,7 @@ enum sja1105_blk_idx { BLK_IDX_AVB_PARAMS, BLK_IDX_GENERAL_PARAMS, BLK_IDX_RETAGGING, + BLK_IDX_CBS, BLK_IDX_XMII_PARAMS, BLK_IDX_MAX, /* Fake block indices that are only valid for dynamic access */ @@ -105,6 +109,8 @@ enum sja1105_blk_idx { #define SJA1105_MAX_RETAGGING_COUNT 32 #define SJA1105_MAX_XMII_PARAMS_COUNT 1 #define SJA1105_MAX_AVB_PARAMS_COUNT 1 +#define SJA1105ET_MAX_CBS_COUNT 10 +#define SJA1105PQRS_MAX_CBS_COUNT 16 #define SJA1105_MAX_FRAME_MEMORY 929 #define SJA1105_MAX_FRAME_MEMORY_RETAGGING 910 @@ -289,6 +295,15 @@ struct sja1105_retagging_entry { u64 destports; }; +struct sja1105_cbs_entry { + u64 port; + u64 prio; + u64 credit_hi; + u64 credit_lo; + u64 send_slope; + u64 idle_slope; +}; + struct sja1105_xmii_params_entry { u64 phy_mac[5]; u64 xmii_mode[5]; @@ -415,4 +430,26 @@ void sja1105_unpack(const void *buf, u64 *val, int start, int end, size_t len); void sja1105_packing(void *buf, u64 *val, int start, int end, size_t len, enum packing_op op); +/* Common implementations for the static and dynamic configs */ +size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); +size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr, + enum packing_op op); + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 7506cabaa16e..d041cac9a487 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -145,7 +145,6 @@ enum hnae3_reset_notify_type { HNAE3_DOWN_CLIENT, HNAE3_INIT_CLIENT, HNAE3_UNINIT_CLIENT, - HNAE3_RESTORE_CLIENT, }; enum hnae3_hw_error_type { @@ -622,16 +621,6 @@ struct hnae3_roce_private_info { unsigned long state; }; -struct hnae3_unic_private_info { - struct net_device *netdev; - u16 rx_buf_len; - u16 num_tx_desc; - u16 num_rx_desc; - - u16 num_tqps; /* total number of tqps in this handle */ - struct hnae3_queue **tqp; /* array base of all TQPs of this instance */ -}; - #define HNAE3_SUPPORT_APP_LOOPBACK BIT(0) #define HNAE3_SUPPORT_PHY_LOOPBACK BIT(1) #define HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK BIT(2) @@ -657,7 +646,6 @@ struct hnae3_handle { union { struct net_device *netdev; /* first member */ struct hnae3_knic_private_info kinfo; - struct hnae3_unic_private_info uinfo; struct hnae3_roce_private_info rinfo; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9fe40c7773b4..b14f2abc2425 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1544,12 +1544,6 @@ static int hns3_nic_set_features(struct net_device *netdev, return ret; } - if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) && - h->ae_algo->ops->enable_vlan_filter) { - enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER); - h->ae_algo->ops->enable_vlan_filter(h, enable); - } - if ((changed & NETIF_F_HW_VLAN_CTAG_RX) && h->ae_algo->ops->enable_hw_strip_rxvtag) { enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 60f82ad89957..66cd4395f781 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -469,21 +469,8 @@ struct hns3_enet_tqp_vector { unsigned long last_jiffies; } ____cacheline_internodealigned_in_smp; -enum hns3_udp_tnl_type { - HNS3_UDP_TNL_VXLAN, - HNS3_UDP_TNL_GENEVE, - HNS3_UDP_TNL_MAX, -}; - -struct hns3_udp_tunnel { - u16 dst_port; - int used; -}; - struct hns3_nic_priv { struct hnae3_handle *ae_handle; - u32 enet_ver; - u32 port_id; struct net_device *netdev; struct device *dev; @@ -495,19 +482,10 @@ struct hns3_nic_priv { struct hns3_enet_tqp_vector *tqp_vector; u16 vector_num; - /* The most recently read link state */ - int link; u64 tx_timeout_count; unsigned long state; - struct timer_list service_timer; - - struct work_struct service_task; - - struct notifier_block notifier_block; - /* Vxlan/Geneve information */ - struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX]; struct hns3_enet_coalesce tx_coal; struct hns3_enet_coalesce rx_coal; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 64a1d0bdd7d1..1d6c328bd9fb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -11,8 +11,6 @@ #include "hnae3.h" #include "hclge_main.h" -#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ) - #define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) static int hclge_ring_space(struct hclge_cmq_ring *ring) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index e3bab8f3847f..463f29151ef0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -884,8 +884,8 @@ struct hclge_cfg_tso_status_cmd { #define HCLGE_GRO_EN_B 0 struct hclge_cfg_gro_status_cmd { - __le16 gro_en; - u8 rsv[22]; + u8 gro_en; + u8 rsv[23]; }; #define HCLGE_TSO_MSS_MIN 256 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 7c9f2ba1f272..96bfad52630d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1387,7 +1387,8 @@ static int hclge_configure(struct hclge_dev *hdev) ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); if (ret) { - dev_err(&hdev->pdev->dev, "Get wrong speed ret=%d.\n", ret); + dev_err(&hdev->pdev->dev, "failed to parse speed %u, ret = %d\n", + cfg.default_speed, ret); return ret; } @@ -1429,26 +1430,17 @@ static int hclge_configure(struct hclge_dev *hdev) return ret; } -static int hclge_config_tso(struct hclge_dev *hdev, unsigned int tso_mss_min, - unsigned int tso_mss_max) +static int hclge_config_tso(struct hclge_dev *hdev, u16 tso_mss_min, + u16 tso_mss_max) { struct hclge_cfg_tso_status_cmd *req; struct hclge_desc desc; - u16 tso_mss; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false); req = (struct hclge_cfg_tso_status_cmd *)desc.data; - - tso_mss = 0; - hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, - HCLGE_TSO_MSS_MIN_S, tso_mss_min); - req->tso_mss_min = cpu_to_le16(tso_mss); - - tso_mss = 0; - hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, - HCLGE_TSO_MSS_MIN_S, tso_mss_max); - req->tso_mss_max = cpu_to_le16(tso_mss); + req->tso_mss_min = cpu_to_le16(tso_mss_min); + req->tso_mss_max = cpu_to_le16(tso_mss_max); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -1465,7 +1457,7 @@ static int hclge_config_gro(struct hclge_dev *hdev, bool en) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GRO_GENERIC_CONFIG, false); req = (struct hclge_cfg_gro_status_cmd *)desc.data; - req->gro_en = cpu_to_le16(en ? 1 : 0); + req->gro_en = en ? 1 : 0; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -9928,10 +9920,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) int ret; hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); - if (!hdev) { - ret = -ENOMEM; - goto out; - } + if (!hdev) + return -ENOMEM; hdev->pdev = pdev; hdev->ae_dev = ae_dev; @@ -10110,6 +10100,7 @@ err_pci_uninit: pci_release_regions(pdev); pci_disable_device(pdev); out: + mutex_destroy(&hdev->vport_lock); return ret; } @@ -10733,16 +10724,19 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) { - /*prepare 4 commands to query DFX BD number*/ - hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_DFX_BD_NUM, true); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_DFX_BD_NUM, true); - desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_DFX_BD_NUM, true); - desc[2].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - hclge_cmd_setup_basic_desc(&desc[3], HCLGE_OPC_DFX_BD_NUM, true); + int i; + + /* initialize command BD except the last one */ + for (i = 0; i < HCLGE_GET_DFX_REG_TYPE_CNT - 1; i++) { + hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, + true); + desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + } + + /* initialize the last command BD */ + hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, true); - return hclge_cmd_send(&hdev->hw, desc, 4); + return hclge_cmd_send(&hdev->hw, desc, HCLGE_GET_DFX_REG_TYPE_CNT); } static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 913c4f677404..46e6e0fef3ba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -771,12 +771,6 @@ struct hclge_dev { u16 num_roce_msi; /* Num of roce vectors for this PF */ int roce_base_vector; - u16 pending_udp_bitmap; - - u16 rx_itr_default; - u16 tx_itr_default; - - u16 adminq_work_limit; /* Num of admin receive queue desc to process */ unsigned long service_timer_period; unsigned long service_timer_previous; struct timer_list reset_timer; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 696c5ae922e3..e89820702540 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -155,7 +155,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev) ret = mdiobus_register(mdio_bus); if (ret) { dev_err(mdio_bus->parent, - "Failed to register MDIO bus ret = %#x\n", ret); + "failed to register MDIO bus, ret = %d\n", ret); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index f38d236ebf4f..fec65239a3c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -11,9 +11,6 @@ #include "hclgevf_main.h" #include "hnae3.h" -#define hclgevf_is_csq(ring) ((ring)->flag & HCLGEVF_TYPE_CSQ) -#define hclgevf_ring_to_dma_dir(ring) (hclgevf_is_csq(ring) ? \ - DMA_TO_DEVICE : DMA_FROM_DEVICE) #define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) static int hclgevf_ring_space(struct hclgevf_cmq_ring *ring) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index f830eef02e5c..40d6e602ab51 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -161,8 +161,8 @@ struct hclgevf_query_res_cmd { #define HCLGEVF_GRO_EN_B 0 struct hclgevf_cfg_gro_status_cmd { - __le16 gro_en; - u8 rsv[22]; + u8 gro_en; + u8 rsv[23]; }; #define HCLGEVF_RSS_DEFAULT_OUTPORT_B 4 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 59fcb80671c8..1b9578d0bd80 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -46,7 +46,7 @@ static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG, HCLGEVF_CMDQ_RX_TAIL_REG, HCLGEVF_CMDQ_RX_HEAD_REG, HCLGEVF_VECTOR0_CMDQ_SRC_REG, - HCLGEVF_CMDQ_INTR_STS_REG, + HCLGEVF_VECTOR0_CMDQ_STATE_REG, HCLGEVF_CMDQ_INTR_EN_REG, HCLGEVF_CMDQ_INTR_GEN_REG}; @@ -669,8 +669,8 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) u16 tc_size[HCLGEVF_MAX_TC_NUM]; struct hclgevf_desc desc; u16 roundup_size; - int status; unsigned int i; + int status; req = (struct hclgevf_rss_tc_mode_cmd *)desc.data; @@ -1143,7 +1143,6 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, send_msg.en_mc = en_mc_pmc ? 1 : 0; ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); - if (ret) dev_err(&hdev->pdev->dev, "Set promisc mode fail, status is %d.\n", ret); @@ -1826,7 +1825,7 @@ static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev) dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n", hclgevf_read_dev(&hdev->hw, HCLGEVF_MISC_VECTOR_REG_BASE)); dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n", - hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STAT_REG)); + hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG)); dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n", hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG)); dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n", @@ -2250,7 +2249,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, /* fetch the events from their corresponding regs */ cmdq_stat_reg = hclgevf_read_dev(&hdev->hw, - HCLGEVF_VECTOR0_CMDQ_STAT_REG); + HCLGEVF_VECTOR0_CMDQ_STATE_REG); if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) { rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); @@ -2403,7 +2402,7 @@ static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en) false); req = (struct hclgevf_cfg_gro_status_cmd *)desc.data; - req->gro_en = cpu_to_le16(en ? 1 : 0); + req->gro_en = en ? 1 : 0; ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index f19583c4bc9b..c1fac8920ae3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -42,8 +42,6 @@ #define HCLGEVF_CMDQ_RX_DEPTH_REG 0x27020 #define HCLGEVF_CMDQ_RX_TAIL_REG 0x27024 #define HCLGEVF_CMDQ_RX_HEAD_REG 0x27028 -#define HCLGEVF_CMDQ_INTR_SRC_REG 0x27100 -#define HCLGEVF_CMDQ_INTR_STS_REG 0x27104 #define HCLGEVF_CMDQ_INTR_EN_REG 0x27108 #define HCLGEVF_CMDQ_INTR_GEN_REG 0x2710C @@ -88,7 +86,7 @@ /* Vector0 interrupt CMDQ event source register(RW) */ #define HCLGEVF_VECTOR0_CMDQ_SRC_REG 0x27100 /* Vector0 interrupt CMDQ event status register(RO) */ -#define HCLGEVF_VECTOR0_CMDQ_STAT_REG 0x27104 +#define HCLGEVF_VECTOR0_CMDQ_STATE_REG 0x27104 /* CMDQ register bits for RX event(=MBX event) */ #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B 1 /* RST register bits for RESET event */ @@ -280,7 +278,7 @@ struct hclgevf_dev { struct semaphore reset_sem; /* protect reset process */ u32 fw_version; - u16 num_tqps; /* num task queue pairs of this PF */ + u16 num_tqps; /* num task queue pairs of this VF */ u16 alloc_rss_size; /* allocated RSS task queue */ u16 rss_size_max; /* HW defined max RSS task queue */ diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 48428d6a00be..623e516a9630 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -3960,7 +3960,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, * @hw: Struct containing variables accessed by shared code * * Reads the first 64 16 bit words of the EEPROM and sums the values read. - * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is + * If the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is * valid. */ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 37a2314d3e6b..944abd5eae11 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -576,7 +576,6 @@ static inline u32 __er32(struct e1000_hw *hw, unsigned long reg) #define er32(reg) __er32(hw, E1000_##reg) -s32 __ew32_prepare(struct e1000_hw *hw); void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val); #define ew32(reg, val) __ew32(hw, E1000_##reg, (val)) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 32f23a15ff64..444532292588 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -158,14 +158,12 @@ static bool e1000e_check_me(u16 device_id) * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set * and try again a number of times. **/ -s32 __ew32_prepare(struct e1000_hw *hw) +static void __ew32_prepare(struct e1000_hw *hw) { s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT; while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i) udelay(50); - - return i; } void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) @@ -646,11 +644,11 @@ static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i) { struct e1000_adapter *adapter = rx_ring->adapter; struct e1000_hw *hw = &adapter->hw; - s32 ret_val = __ew32_prepare(hw); + __ew32_prepare(hw); writel(i, rx_ring->tail); - if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) { + if (unlikely(i != readl(rx_ring->tail))) { u32 rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); @@ -663,11 +661,11 @@ static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i) { struct e1000_adapter *adapter = tx_ring->adapter; struct e1000_hw *hw = &adapter->hw; - s32 ret_val = __ew32_prepare(hw); + __ew32_prepare(hw); writel(i, tx_ring->tail); - if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) { + if (unlikely(i != readl(tx_ring->tail))) { u32 tctl = er32(TCTL); ew32(TCTL, tctl & ~E1000_TCTL_EN); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 37514a75f928..6a089848c857 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -694,10 +694,8 @@ init_adminq_exit: * i40e_shutdown_adminq - shutdown routine for the Admin Queue * @hw: pointer to the hardware structure **/ -i40e_status i40e_shutdown_adminq(struct i40e_hw *hw) +void i40e_shutdown_adminq(struct i40e_hw *hw) { - i40e_status ret_code = 0; - if (i40e_check_asq_alive(hw)) i40e_aq_queue_shutdown(hw, true); @@ -706,8 +704,6 @@ i40e_status i40e_shutdown_adminq(struct i40e_hw *hw) if (hw->nvm_buff.va) i40e_free_virt_mem(hw, &hw->nvm_buff); - - return ret_code; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ea7395b391e5..5d807c8004f8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -14486,29 +14486,29 @@ static void i40e_print_features(struct i40e_pf *pf) i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); + i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", + i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", pf->hw.func_caps.num_vsis, pf->vsi[pf->lan_vsi]->num_queue_pairs); if (pf->flags & I40E_FLAG_RSS_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " RSS"); + i += scnprintf(&buf[i], REMAIN(i), " RSS"); if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); + i += scnprintf(&buf[i], REMAIN(i), " FD_ATR"); if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - i += snprintf(&buf[i], REMAIN(i), " FD_SB"); - i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); + i += scnprintf(&buf[i], REMAIN(i), " FD_SB"); + i += scnprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (pf->flags & I40E_FLAG_DCB_CAPABLE) - i += snprintf(&buf[i], REMAIN(i), " DCB"); - i += snprintf(&buf[i], REMAIN(i), " VxLAN"); - i += snprintf(&buf[i], REMAIN(i), " Geneve"); + i += scnprintf(&buf[i], REMAIN(i), " DCB"); + i += scnprintf(&buf[i], REMAIN(i), " VxLAN"); + i += scnprintf(&buf[i], REMAIN(i), " Geneve"); if (pf->flags & I40E_FLAG_PTP) - i += snprintf(&buf[i], REMAIN(i), " PTP"); + i += scnprintf(&buf[i], REMAIN(i), " PTP"); if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - i += snprintf(&buf[i], REMAIN(i), " VEB"); + i += scnprintf(&buf[i], REMAIN(i), " VEB"); else - i += snprintf(&buf[i], REMAIN(i), " VEPA"); + i += scnprintf(&buf[i], REMAIN(i), " VEPA"); dev_info(&pf->pdev->dev, "%s\n", buf); kfree(buf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index bbb478f09093..5c1378641b3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -17,7 +17,7 @@ /* adminq functions */ i40e_status i40e_init_adminq(struct i40e_hw *hw); -i40e_status i40e_shutdown_adminq(struct i40e_hw *hw); +void i40e_shutdown_adminq(struct i40e_hw *hw); void i40e_adminq_init_ring_data(struct i40e_hw *hw); i40e_status i40e_clean_arq_element(struct i40e_hw *hw, struct i40e_arq_event_info *e, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index f3953744c505..7276580cbe64 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -139,8 +139,6 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program * - * This function enables or disables a UMEM to a certain ring. - * * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR} **/ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) @@ -224,7 +222,7 @@ no_buffers: } /** - * i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer + * i40e_construct_skb_zc - Create skbuff from zero-copy Rx buffer * @rx_ring: Rx ring * @xdp: xdp_buff * diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 586d69491268..f04c338fb6e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -156,13 +156,11 @@ struct ice_aqc_manage_mac_write { #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0) #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1) #define ICE_AQC_MAN_MAC_WR_S 6 -#define ICE_AQC_MAN_MAC_WR_M (3 << ICE_AQC_MAN_MAC_WR_S) +#define ICE_AQC_MAN_MAC_WR_M ICE_M(3, ICE_AQC_MAN_MAC_WR_S) #define ICE_AQC_MAN_MAC_UPDATE_LAA 0 -#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL (BIT(0) << ICE_AQC_MAN_MAC_WR_S) - /* High 16 bits of MAC address in big endian order */ - __be16 sah; - /* Low 32 bits of MAC address in big endian order */ - __be32 sal; +#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL BIT(ICE_AQC_MAN_MAC_WR_S) + /* byte stream in network order */ + u8 mac_addr[ETH_ALEN]; __le32 addr_high; __le32 addr_low; }; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 94d833b4e745..a174911d8994 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -13,7 +13,7 @@ */ static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) { - int offset, i; + unsigned int offset, i; mutex_lock(qs_cfg->qs_mutex); offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, @@ -39,7 +39,7 @@ static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) */ static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) { - int i, index = 0; + unsigned int i, index = 0; mutex_lock(qs_cfg->qs_mutex); for (i = 0; i < qs_cfg->q_count; i++) { @@ -281,7 +281,9 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) */ int ice_setup_rx_ctx(struct ice_ring *ring) { + struct device *dev = ice_pf_to_dev(ring->vsi->back); int chain_len = ICE_MAX_CHAINED_RX_BUFS; + u16 num_bufs = ICE_DESC_UNUSED(ring); struct ice_vsi *vsi = ring->vsi; u32 rxdid = ICE_RXDID_FLEX_NIC; struct ice_rlan_ctx rlan_ctx; @@ -324,7 +326,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) return err; xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq); - dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", + dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) @@ -408,7 +410,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (err) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + dev_err(dev, "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", pf_q, err); return -EIO; } @@ -426,13 +428,23 @@ int ice_setup_rx_ctx(struct ice_ring *ring) ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); - err = ring->xsk_umem ? - ice_alloc_rx_bufs_zc(ring, ICE_DESC_UNUSED(ring)) : - ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); - if (err) - dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", - ring->xsk_umem ? "UMEM enabled " : "", - ring->q_index, pf_q); + if (ring->xsk_umem) { + if (!xsk_buff_can_alloc(ring->xsk_umem, num_bufs)) { + dev_warn(dev, "UMEM does not provide enough addresses to fill %d buffers on Rx ring %d\n", + num_bufs, ring->q_index); + dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n"); + + return 0; + } + + err = ice_alloc_rx_bufs_zc(ring, num_bufs); + if (err) + dev_info(dev, "Failed to allocate some buffers on UMEM enabled Rx ring %d (pf_q %d)\n", + ring->q_index, pf_q); + return 0; + } + + ice_alloc_rx_bufs(ring, num_bufs); return 0; } @@ -638,6 +650,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, struct ice_aqc_add_txqs_perq *txq; struct ice_pf *pf = vsi->back; u8 buf_len = sizeof(*qg_buf); + struct ice_hw *hw = &pf->hw; enum ice_status status; u16 pf_q; u8 tc; @@ -646,13 +659,13 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, ice_tlan_ctx_info); /* init queue specific tail reg. It is referred as * transmit comm scheduler queue doorbell. */ - ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); + ring->tail = hw->hw_addr + QTX_COMM_DBELL(pf_q); if (IS_ENABLED(CONFIG_DCB)) tc = ring->dcb_tc; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 0a0b00fffaf7..8c73e161829d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -387,6 +387,7 @@ ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd) static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) { struct ice_switch_info *sw; + enum ice_status status; hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*hw->switch_info), GFP_KERNEL); @@ -397,7 +398,12 @@ static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) INIT_LIST_HEAD(&sw->vsi_list_map_head); - return ice_init_def_sw_recp(hw); + status = ice_init_def_sw_recp(hw); + if (status) { + devm_kfree(ice_hw_to_dev(hw), hw->switch_info); + return status; + } + return 0; } /** @@ -1092,7 +1098,7 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, rlan_ctx->prefena = 1; - ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); + ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); } @@ -1994,10 +2000,7 @@ ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write); cmd->flags = flags; - - /* Prep values for flags, sah, sal */ - cmd->sah = htons(*((const u16 *)mac_addr)); - cmd->sal = htonl(*((const u32 *)(mac_addr + 2))); + ether_addr_copy(cmd->mac_addr, mac_addr); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -3196,12 +3199,14 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) /** * ice_set_ctx - set context bits in packed structure + * @hw: pointer to the hardware structure * @src_ctx: pointer to a generic non-packed context structure * @dest_ctx: pointer to memory for the packed structure * @ce_info: a description of the structure to be transformed */ enum ice_status -ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) { int f; @@ -3210,6 +3215,12 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) * using the correct size so that we are correct regardless * of the endianness of the machine. */ + if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) { + ice_debug(hw, ICE_DBG_QCTX, + "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n", + f, ce_info[f].width, ce_info[f].size_of); + continue; + } switch (ce_info[f].size_of) { case sizeof(u8): ice_write_byte(src_ctx, dest_ctx, &ce_info[f]); diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index bea755a658eb..9b9e50d2398b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -70,7 +70,8 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); extern const struct ice_ctx_ele ice_tlan_ctx_info[]; enum ice_status -ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info); +ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info); extern struct mutex ice_global_cfg_lock_sw; diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 9a865962296d..479a74efc536 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -12,6 +12,7 @@ do { \ (qinfo)->sq.bal = prefix##_ATQBAL; \ (qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M; \ (qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M; \ + (qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M; \ (qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M; \ (qinfo)->rq.head = prefix##_ARQH; \ (qinfo)->rq.tail = prefix##_ARQT; \ @@ -20,6 +21,7 @@ do { \ (qinfo)->rq.bal = prefix##_ARQBAL; \ (qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M; \ (qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M; \ + (qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M; \ (qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M; \ } while (0) @@ -199,7 +201,9 @@ unwind_alloc_rq_bufs: cq->rq.r.rq_bi[i].pa = 0; cq->rq.r.rq_bi[i].size = 0; } + cq->rq.r.rq_bi = NULL; devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head); + cq->rq.dma_head = NULL; return ICE_ERR_NO_MEMORY; } @@ -245,7 +249,9 @@ unwind_alloc_sq_bufs: cq->sq.r.sq_bi[i].pa = 0; cq->sq.r.sq_bi[i].size = 0; } + cq->sq.r.sq_bi = NULL; devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head); + cq->sq.dma_head = NULL; return ICE_ERR_NO_MEMORY; } @@ -304,6 +310,28 @@ ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) return 0; } +#define ICE_FREE_CQ_BUFS(hw, qi, ring) \ +do { \ + int i; \ + /* free descriptors */ \ + if ((qi)->ring.r.ring##_bi) \ + for (i = 0; i < (qi)->num_##ring##_entries; i++) \ + if ((qi)->ring.r.ring##_bi[i].pa) { \ + dmam_free_coherent(ice_hw_to_dev(hw), \ + (qi)->ring.r.ring##_bi[i].size, \ + (qi)->ring.r.ring##_bi[i].va, \ + (qi)->ring.r.ring##_bi[i].pa); \ + (qi)->ring.r.ring##_bi[i].va = NULL;\ + (qi)->ring.r.ring##_bi[i].pa = 0;\ + (qi)->ring.r.ring##_bi[i].size = 0;\ + } \ + /* free the buffer info list */ \ + if ((qi)->ring.cmd_buf) \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ + /* free DMA head */ \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ +} while (0) + /** * ice_init_sq - main initialization routine for Control ATQ * @hw: pointer to the hardware structure @@ -357,6 +385,7 @@ static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) goto init_ctrlq_exit; init_ctrlq_free_rings: + ICE_FREE_CQ_BUFS(hw, cq, sq); ice_free_cq_ring(hw, &cq->sq); init_ctrlq_exit: @@ -416,33 +445,13 @@ static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) goto init_ctrlq_exit; init_ctrlq_free_rings: + ICE_FREE_CQ_BUFS(hw, cq, rq); ice_free_cq_ring(hw, &cq->rq); init_ctrlq_exit: return ret_code; } -#define ICE_FREE_CQ_BUFS(hw, qi, ring) \ -do { \ - int i; \ - /* free descriptors */ \ - for (i = 0; i < (qi)->num_##ring##_entries; i++) \ - if ((qi)->ring.r.ring##_bi[i].pa) { \ - dmam_free_coherent(ice_hw_to_dev(hw), \ - (qi)->ring.r.ring##_bi[i].size,\ - (qi)->ring.r.ring##_bi[i].va,\ - (qi)->ring.r.ring##_bi[i].pa);\ - (qi)->ring.r.ring##_bi[i].va = NULL; \ - (qi)->ring.r.ring##_bi[i].pa = 0; \ - (qi)->ring.r.ring##_bi[i].size = 0; \ - } \ - /* free the buffer info list */ \ - if ((qi)->ring.cmd_buf) \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ - /* free DMA head */ \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ -} while (0) - /** * ice_shutdown_sq - shutdown the Control ATQ * @hw: pointer to the hardware structure @@ -635,6 +644,50 @@ init_ctrlq_free_sq: } /** + * ice_shutdown_ctrlq - shutdown routine for any control queue + * @hw: pointer to the hardware structure + * @q_type: specific Control queue type + * + * NOTE: this function does not destroy the control queue locks. + */ +static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +{ + struct ice_ctl_q_info *cq; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + cq = &hw->adminq; + if (ice_check_sq_alive(hw, cq)) + ice_aq_q_shutdown(hw, true); + break; + case ICE_CTL_Q_MAILBOX: + cq = &hw->mailboxq; + break; + default: + return; + } + + ice_shutdown_sq(hw, cq); + ice_shutdown_rq(hw, cq); +} + +/** + * ice_shutdown_all_ctrlq - shutdown routine for all control queues + * @hw: pointer to the hardware structure + * + * NOTE: this function does not destroy the control queue locks. The driver + * may call this at runtime to shutdown and later restart control queues, such + * as in response to a reset event. + */ +void ice_shutdown_all_ctrlq(struct ice_hw *hw) +{ + /* Shutdown FW admin queue */ + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); + /* Shutdown PF-VF Mailbox */ + ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX); +} + +/** * ice_init_all_ctrlq - main initialization routine for all control queues * @hw: pointer to the hardware structure * @@ -649,17 +702,27 @@ init_ctrlq_free_sq: */ enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) { - enum ice_status ret_code; + enum ice_status status; + u32 retry = 0; /* Init FW admin queue */ - ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN); - if (ret_code) - return ret_code; + do { + status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN); + if (status) + return status; - ret_code = ice_init_check_adminq(hw); - if (ret_code) - return ret_code; + status = ice_init_check_adminq(hw); + if (status != ICE_ERR_AQ_FW_CRITICAL) + break; + + ice_debug(hw, ICE_DBG_AQ_MSG, + "Retry Admin Queue init due to FW critical error\n"); + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); + msleep(ICE_CTL_Q_ADMIN_INIT_MSEC); + } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT); + if (status) + return status; /* Init Mailbox queue */ return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX); } @@ -701,50 +764,6 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw) } /** - * ice_shutdown_ctrlq - shutdown routine for any control queue - * @hw: pointer to the hardware structure - * @q_type: specific Control queue type - * - * NOTE: this function does not destroy the control queue locks. - */ -static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) -{ - struct ice_ctl_q_info *cq; - - switch (q_type) { - case ICE_CTL_Q_ADMIN: - cq = &hw->adminq; - if (ice_check_sq_alive(hw, cq)) - ice_aq_q_shutdown(hw, true); - break; - case ICE_CTL_Q_MAILBOX: - cq = &hw->mailboxq; - break; - default: - return; - } - - ice_shutdown_sq(hw, cq); - ice_shutdown_rq(hw, cq); -} - -/** - * ice_shutdown_all_ctrlq - shutdown routine for all control queues - * @hw: pointer to the hardware structure - * - * NOTE: this function does not destroy the control queue locks. The driver - * may call this at runtime to shutdown and later restart control queues, such - * as in response to a reset event. - */ -void ice_shutdown_all_ctrlq(struct ice_hw *hw) -{ - /* Shutdown FW admin queue */ - ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); - /* Shutdown PF-VF Mailbox */ - ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX); -} - -/** * ice_destroy_ctrlq_locks - Destroy locks for a control queue * @cq: pointer to the control queue * @@ -1042,9 +1061,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, /* update the error if time out occurred */ if (!cmd_completed) { - ice_debug(hw, ICE_DBG_AQ_MSG, - "Control Send Queue Writeback timeout.\n"); - status = ICE_ERR_AQ_TIMEOUT; + if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask || + rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) { + ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n"); + status = ICE_ERR_AQ_FW_CRITICAL; + } else { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send Queue Writeback timeout.\n"); + status = ICE_ERR_AQ_TIMEOUT; + } } sq_send_command_error: diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index bf0ebe6149e8..faaa08e8171b 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -34,6 +34,8 @@ enum ice_ctl_q { /* Control Queue timeout settings - max delay 250ms */ #define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ #define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ +#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ +#define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ struct ice_ctl_q_ring { void *dma_head; /* Virtual address to DMA head */ @@ -59,6 +61,7 @@ struct ice_ctl_q_ring { u32 bal; u32 len_mask; u32 len_ena_mask; + u32 len_crit_mask; u32 head_mask; }; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 93cf70d06fe5..87f91b750d59 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -7,8 +7,6 @@ #include "ice_dcb_nl.h" #include <net/dcbnl.h> -#define ICE_APP_PROT_ID_ROCE 0x8915 - /** * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info * @netdev: device associated with interface that needs reset diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 2f1c776747a4..1086c9f778b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -39,6 +39,7 @@ #define PF_MBX_ARQH_ARQH_M ICE_M(0x3FF, 0) #define PF_MBX_ARQLEN 0x0022E480 #define PF_MBX_ARQLEN_ARQLEN_M ICE_M(0x3FF, 0) +#define PF_MBX_ARQLEN_ARQCRIT_M BIT(30) #define PF_MBX_ARQLEN_ARQENABLE_M BIT(31) #define PF_MBX_ARQT 0x0022E580 #define PF_MBX_ATQBAH 0x0022E180 @@ -47,6 +48,7 @@ #define PF_MBX_ATQH_ATQH_M ICE_M(0x3FF, 0) #define PF_MBX_ATQLEN 0x0022E200 #define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0) +#define PF_MBX_ATQLEN_ATQCRIT_M BIT(30) #define PF_MBX_ATQLEN_ATQENABLE_M BIT(31) #define PF_MBX_ATQT 0x0022E300 #define PRTDCB_GENC 0x00083000 diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index bd2cd3435768..14dfbbc1b2cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -581,7 +581,7 @@ struct ice_tlan_ctx { u8 drop_ena; u8 cache_prof_idx; u8 pkt_shaper_prof_idx; - u8 int_q_state; /* width not needed - internal do not write */ + u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; /* macro to make the table lines short */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f81bd4c30bbc..6f3ee8ac11ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1812,6 +1812,12 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) enum ice_status status; int ret = 0; + /* do not allow modifying VLAN stripping when a port VLAN is configured + * on this VSI + */ + if (vsi->info.pvid) + return 0; + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -2779,7 +2785,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_map_rings_to_vectors(vsi); if (ice_is_xdp_ena_vsi(vsi)) { - vsi->num_xdp_txq = vsi->alloc_txq; + vsi->num_xdp_txq = vsi->alloc_rxq; ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); if (ret) goto err_vectors; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1c255b27244c..b64c4e796636 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1899,6 +1899,9 @@ free_qmap: for (i = 0; i < vsi->tc_cfg.numtc; i++) max_txqs[i] = vsi->num_txq; + /* change number of XDP Tx queues to 0 */ + vsi->num_xdp_txq = 0; + return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); } @@ -1932,7 +1935,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } if (!ice_is_xdp_ena_vsi(vsi) && prog) { - vsi->num_xdp_txq = vsi->alloc_txq; + vsi->num_xdp_txq = vsi->alloc_rxq; xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); @@ -4217,6 +4220,33 @@ ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) } /** + * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters + * @vsi: the VSI to be updated + * @rings: rings to work on + * @count: number of rings + */ +static void +ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings, + u16 count) +{ + struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; + u16 i; + + for (i = 0; i < count; i++) { + struct ice_ring *ring; + u64 pkts, bytes; + + ring = READ_ONCE(rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + vsi_stats->tx_packets += pkts; + vsi_stats->tx_bytes += bytes; + vsi->tx_restart += ring->tx_stats.restart_q; + vsi->tx_busy += ring->tx_stats.tx_busy; + vsi->tx_linearize += ring->tx_stats.tx_linearize; + } +} + +/** * ice_update_vsi_ring_stats - Update VSI stats counters * @vsi: the VSI to be updated */ @@ -4243,15 +4273,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_lock(); /* update Tx rings counters */ - ice_for_each_txq(vsi, i) { - ring = READ_ONCE(vsi->tx_rings[i]); - ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); - vsi_stats->tx_packets += pkts; - vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->tx_stats.restart_q; - vsi->tx_busy += ring->tx_stats.tx_busy; - vsi->tx_linearize += ring->tx_stats.tx_linearize; - } + ice_update_vsi_tx_ring_stats(vsi, vsi->tx_rings, vsi->num_txq); /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { @@ -4263,6 +4285,11 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; } + /* update XDP Tx rings counters */ + if (ice_is_xdp_ena_vsi(vsi)) + ice_update_vsi_tx_ring_stats(vsi, vsi->xdp_rings, + vsi->num_xdp_txq); + rcu_read_unlock(); } @@ -4295,7 +4322,13 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_PF) { cur_ns->rx_crc_errors = pf->stats.crc_errors; cur_ns->rx_errors = pf->stats.crc_errors + - pf->stats.illegal_bytes; + pf->stats.illegal_bytes + + pf->stats.rx_len_errors + + pf->stats.rx_undersize + + pf->hw_csum_rx_error + + pf->stats.rx_jabber + + pf->stats.rx_fragments + + pf->stats.rx_oversize; cur_ns->rx_length_errors = pf->stats.rx_len_errors; /* record drops from the port level */ cur_ns->rx_missed_errors = pf->stats.eth.rx_discards; @@ -5035,7 +5068,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) struct ice_pf *pf = vsi->back; u8 count = 0; - if (new_mtu == netdev->mtu) { + if (new_mtu == (int)netdev->mtu) { netdev_warn(netdev, "MTU is already %u\n", netdev->mtu); return 0; } @@ -5050,11 +5083,11 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } } - if (new_mtu < netdev->min_mtu) { + if (new_mtu < (int)netdev->min_mtu) { netdev_err(netdev, "new MTU invalid. min_mtu is %d\n", netdev->min_mtu); return -EINVAL; - } else if (new_mtu > netdev->max_mtu) { + } else if (new_mtu > (int)netdev->max_mtu) { netdev_err(netdev, "new MTU invalid. max_mtu is %d\n", netdev->min_mtu); return -EINVAL; @@ -5075,7 +5108,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return -EBUSY; } - netdev->mtu = new_mtu; + netdev->mtu = (unsigned int)new_mtu; /* if VSI is up, bring it down and then back up */ if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { @@ -5201,6 +5234,8 @@ const char *ice_stat_str(enum ice_status stat_err) return "ICE_ERR_AQ_NO_WORK"; case ICE_ERR_AQ_EMPTY: return "ICE_ERR_AQ_EMPTY"; + case ICE_ERR_AQ_FW_CRITICAL: + return "ICE_ERR_AQ_FW_CRITICAL"; } return "ICE_ERR_UNKNOWN"; diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 546a02856d09..4028c6365172 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -37,6 +37,7 @@ enum ice_status { ICE_ERR_AQ_FULL = -102, ICE_ERR_AQ_NO_WORK = -103, ICE_ERR_AQ_EMPTY = -104, + ICE_ERR_AQ_FW_CRITICAL = -105, }; #endif /* _ICE_STATUS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cf21b4fe928a..e70c4619edc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -38,7 +38,8 @@ */ #if (PAGE_SIZE < 8192) #define ICE_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) + ((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \ + SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) /** * ice_compute_pad - compute the padding @@ -107,8 +108,8 @@ static inline int ice_skb_pad(void) #define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \ ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR) #define ICE_DESC_UNUSED(R) \ - ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) + (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) #define ICE_TX_FLAGS_TSO BIT(0) #define ICE_TX_FLAGS_HW_VLAN BIT(1) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 1ba97172d8d0..ab2031b1c635 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -84,17 +84,12 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u8 ptype) { struct ice_rx_ptype_decoded decoded; - u16 rx_error, rx_status; - u16 rx_stat_err1; + u16 rx_status0, rx_status1; bool ipv4, ipv6; - rx_status = le16_to_cpu(rx_desc->wb.status_error0); - rx_error = rx_status & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)); + rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - rx_stat_err1 = le16_to_cpu(rx_desc->wb.status_error1); decoded = ice_decode_rx_desc_ptype(ptype); /* Start with CHECKSUM_NONE and by default csum_level = 0 */ @@ -106,7 +101,7 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, return; /* check if HW has decoded the packet and checksum */ - if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) + if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; if (!(decoded.known && decoded.outer_ip)) @@ -117,22 +112,22 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); - if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) + if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) goto checksum_fail; - else if (ipv6 && (rx_status & - (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) + + if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) goto checksum_fail; /* check for L4 errors and handle packets that were not able to be * checksummed due to arrival speed */ - if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) + if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) goto checksum_fail; /* check for outer UDP checksum error in tunneled packets */ - if ((rx_stat_err1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) && - (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))) + if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) && + (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))) goto checksum_fail; /* If there is an outer header present that might contain a checksum diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 9b09a111321c..efd54299a220 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2862,9 +2862,11 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) return -EIO; } - /* only set dflt_lan_addr once */ - if (is_zero_ether_addr(vf->dflt_lan_addr.addr) && - is_unicast_ether_addr(mac_addr)) + /* Set the default LAN address to the latest unicast MAC address added + * by the VF. The default LAN address is reported by the PF via + * ndo_get_vf_config. + */ + if (is_unicast_ether_addr(mac_addr)) ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr); vf->num_mac++; diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 79ee0a747260..3254737c07a3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -12,7 +12,7 @@ #include "igb.h" static s32 igb_set_default_fc(struct e1000_hw *hw); -static s32 igb_set_fc_watermarks(struct e1000_hw *hw); +static void igb_set_fc_watermarks(struct e1000_hw *hw); /** * igb_get_bus_info_pcie - Get PCIe bus information @@ -687,7 +687,7 @@ s32 igb_setup_link(struct e1000_hw *hw) wr32(E1000_FCTTV, hw->fc.pause_time); - ret_val = igb_set_fc_watermarks(hw); + igb_set_fc_watermarks(hw); out: @@ -723,9 +723,8 @@ void igb_config_collision_dist(struct e1000_hw *hw) * flow control XON frame transmission is enabled, then set XON frame * tansmission as well. **/ -static s32 igb_set_fc_watermarks(struct e1000_hw *hw) +static void igb_set_fc_watermarks(struct e1000_hw *hw) { - s32 ret_val = 0; u32 fcrtl = 0, fcrth = 0; /* Set the flow control receive threshold registers. Normally, @@ -747,8 +746,6 @@ static s32 igb_set_fc_watermarks(struct e1000_hw *hw) } wr32(E1000_FCRTL, fcrtl); wr32(E1000_FCRTH, fcrth); - - return ret_val; } /** diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 14f9edaaaf83..5dbc5a156626 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -457,7 +457,10 @@ struct igc_nfc_rule { u16 action; }; -#define IGC_MAX_RXNFC_RULES 16 +/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority + * based, and 8 ethertype based. + */ +#define IGC_MAX_RXNFC_RULES 32 /* igc_desc_unused - calculate if we have unused descriptors */ static inline u16 igc_desc_unused(const struct igc_ring *ring) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 3d8d40d6fa3f..186deb1d9375 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -269,13 +269,9 @@ #define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ #define IGC_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ #define IGC_TXD_STAT_DD 0x00000001 /* Descriptor Done */ -#define IGC_TXD_STAT_EC 0x00000002 /* Excess Collisions */ -#define IGC_TXD_STAT_LC 0x00000004 /* Late Collisions */ -#define IGC_TXD_STAT_TU 0x00000008 /* Transmit underrun */ #define IGC_TXD_CMD_TCP 0x01000000 /* TCP packet */ #define IGC_TXD_CMD_IP 0x02000000 /* IP packet */ #define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ -#define IGC_TXD_STAT_TC 0x00000004 /* Tx Underrun */ #define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ /* IPSec Encrypt Enable */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 946e775e34ae..a938ec8db681 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1222,8 +1222,8 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, * @adapter: Pointer to adapter * @rule: Rule under evaluation * - * Rules with both destination and source MAC addresses are considered invalid - * since the driver doesn't support them. + * The driver doesn't support rules with multiple matches so if more than + * one bit in filter flags is set, @rule is considered invalid. * * Also, if there is already another rule with the same filter in a different * location, @rule is considered invalid. @@ -1244,9 +1244,8 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter, return -EINVAL; } - if (flags & IGC_FILTER_FLAG_DST_MAC_ADDR && - flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { - netdev_dbg(dev, "Filters with both dst and src are not supported\n"); + if (flags & (flags - 1)) { + netdev_dbg(dev, "Rule with multiple matches not supported\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 89445ab02a98..410aeb01de5c 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -235,15 +235,14 @@ out: void igc_clear_hw_cntrs_base(struct igc_hw *hw) { rd32(IGC_CRCERRS); - rd32(IGC_SYMERRS); rd32(IGC_MPC); rd32(IGC_SCC); rd32(IGC_ECOL); rd32(IGC_MCC); rd32(IGC_LATECOL); rd32(IGC_COLC); + rd32(IGC_RERC); rd32(IGC_DC); - rd32(IGC_SEC); rd32(IGC_RLEC); rd32(IGC_XONRXC); rd32(IGC_XONTXC); @@ -288,7 +287,7 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw) rd32(IGC_ALGNERRC); rd32(IGC_RXERRC); rd32(IGC_TNCRS); - rd32(IGC_CEXTERR); + rd32(IGC_HTDPMC); rd32(IGC_TSCTC); rd32(IGC_TSCTFC); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 97d26991c87e..43fcabb5c023 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3701,8 +3701,6 @@ void igc_update_stats(struct igc_adapter *adapter) adapter->stats.prc511 += rd32(IGC_PRC511); adapter->stats.prc1023 += rd32(IGC_PRC1023); adapter->stats.prc1522 += rd32(IGC_PRC1522); - adapter->stats.symerrs += rd32(IGC_SYMERRS); - adapter->stats.sec += rd32(IGC_SEC); mpc = rd32(IGC_MPC); adapter->stats.mpc += mpc; @@ -3741,6 +3739,7 @@ void igc_update_stats(struct igc_adapter *adapter) adapter->stats.tpt += rd32(IGC_TPT); adapter->stats.colc += rd32(IGC_COLC); + adapter->stats.colc += rd32(IGC_RERC); adapter->stats.algnerrc += rd32(IGC_ALGNERRC); diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 7f999cfc9b39..232e82dec62e 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -127,7 +127,6 @@ /* Statistics Register Descriptions */ #define IGC_CRCERRS 0x04000 /* CRC Error Count - R/clr */ #define IGC_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ -#define IGC_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ #define IGC_RXERRC 0x0400C /* Receive Error Count - R/clr */ #define IGC_MPC 0x04010 /* Missed Packet Count - R/clr */ #define IGC_SCC 0x04014 /* Single Collision Count - R/clr */ @@ -135,10 +134,10 @@ #define IGC_MCC 0x0401C /* Multiple Collision Count - R/clr */ #define IGC_LATECOL 0x04020 /* Late Collision Count - R/clr */ #define IGC_COLC 0x04028 /* Collision Count - R/clr */ +#define IGC_RERC 0x0402C /* Receive Error Count - R/clr */ #define IGC_DC 0x04030 /* Defer Count - R/clr */ #define IGC_TNCRS 0x04034 /* Tx-No CRS - R/clr */ -#define IGC_SEC 0x04038 /* Sequence Error Count - R/clr */ -#define IGC_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define IGC_HTDPMC 0x0403C /* Host Transmit Discarded by MAC - R/clr */ #define IGC_RLEC 0x04040 /* Receive Length Error Count - R/clr */ #define IGC_XONRXC 0x04048 /* XON Rx Count - R/clr */ #define IGC_XONTXC 0x0404C /* XON Tx Count - R/clr */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 0bd1294ba517..17357a12cbdc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -64,8 +64,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) hw->mac.ops.check_link(hw, &speed, &link_up, false); /* if link is down, assume supported */ if (link_up) - supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? - true : false; + supported = speed == IXGBE_LINK_SPEED_1GB_FULL; else supported = true; } @@ -2243,7 +2242,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) } /* Configure pause time (2 TCs per register) */ - reg = hw->fc.pause_time * 0x00010001; + reg = hw->fc.pause_time * 0x00010001U; for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++) IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 45fc7ce1a543..a59c166f794f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2973,35 +2973,6 @@ static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, /* skip the flush */ } -static inline void ixgbe_irq_disable_queues(struct ixgbe_adapter *adapter, - u64 qmask) -{ - u32 mask; - struct ixgbe_hw *hw = &adapter->hw; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - mask = (IXGBE_EIMS_RTX_QUEUE & qmask); - IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - mask = (qmask & 0xFFFFFFFF); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); - mask = (qmask >> 32); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); - break; - default: - break; - } - /* skip the flush */ -} - /** * ixgbe_irq_enable - Enable default interrupt generation settings * @adapter: board private structure diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 537dfff585e0..d05a5690e66b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -102,7 +102,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, * indirection table and RSS hash key with PF therefore * we want to disable the querying by default. */ - adapter->vfinfo[i].rss_query_enabled = 0; + adapter->vfinfo[i].rss_query_enabled = false; /* Untrust all VFs */ adapter->vfinfo[i].trusted = false; diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 8596ca0e60eb..7df35872c107 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -746,15 +746,12 @@ mtk_star_ring_free_skbs(struct mtk_star_priv *priv, struct mtk_star_ring *ring, struct mtk_star_ring_desc_data *)) { struct mtk_star_ring_desc_data desc_data; - struct mtk_star_ring_desc *desc; int i; for (i = 0; i < MTK_STAR_RING_NUM_DESCS; i++) { if (!ring->dma_addrs[i]) continue; - desc = &ring->descs[i]; - desc_data.dma_addr = ring->dma_addrs[i]; desc_data.skb = ring->skbs[i]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 4256d59eca2b..b6ffd1622cfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -84,7 +84,7 @@ config MLX5_CLS_ACT default y help mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), - works in both native NIC mdoe and Switchdev SRIOV mode. + works in both native NIC mode and Switchdev SRIOV mode. Actions get attached to a Hardware offloaded classifiers and are invoked after a successful classification. Actions are used to overwrite the classification result, instantly drop or redirect and/or diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index e5ee9103fefb..b61e47bc16e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -34,7 +34,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ + en_rep.o en/rep/bond.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/mapping.o esw/chains.o en/tc_tun.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ @@ -46,6 +47,10 @@ mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ ecpf.o rdma.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ + esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c index cab708af3422..cbf3d76c05a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -56,8 +56,8 @@ void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx); } -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, - u64 rcd_sn) +int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, + u32 seq, __be64 rcd_sn) { return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index e09bc3858d57..aefea467f7b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -109,8 +109,8 @@ int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, bool direction_sx); void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, bool direction_sx); -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, - u64 rcd_sn); +int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, + u32 seq, __be64 rcd_sn); bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); @@ -125,8 +125,8 @@ mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, bool direction_sx) { return -ENOTSUPP; } static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, bool direction_sx) { } -static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, - u32 seq, u64 rcd_sn) { return 0; } +static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, + u32 seq, __be64 rcd_sn) { return 0; } static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return mlx5_accel_is_ktls_device(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 7a77fe40af3a..1d91a0d0ab1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1072,7 +1072,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, ds = ent->ts2 - ent->ts1; op = MLX5_GET(mbox_in, in->first.data, opcode); - if (op < ARRAY_SIZE(cmd->stats)) { + if (op < MLX5_CMD_OP_MAX) { stats = &cmd->stats[op]; spin_lock_irq(&stats->lock); stats->sum += ds; @@ -1551,7 +1551,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force if (ent->callback) { ds = ent->ts2 - ent->ts1; - if (ent->op < ARRAY_SIZE(cmd->stats)) { + if (ent->op < MLX5_CMD_OP_MAX) { stats = &cmd->stats[ent->op]; spin_lock_irqsave(&stats->lock, flags); stats->sum += ds; @@ -1936,6 +1936,11 @@ static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) cmd->alloc_dma); } +static u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + int mlx5_cmd_init(struct mlx5_core_dev *dev) { int size = sizeof(struct mlx5_cmd_prot_block); @@ -1955,10 +1960,16 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0); - if (!cmd->pool) + cmd->stats = kvzalloc(MLX5_CMD_OP_MAX * sizeof(*cmd->stats), GFP_KERNEL); + if (!cmd->stats) return -ENOMEM; + cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0); + if (!cmd->pool) { + err = -ENOMEM; + goto dma_pool_err; + } + err = alloc_cmd_page(dev, cmd); if (err) goto err_free_pool; @@ -1994,7 +2005,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); - for (i = 0; i < ARRAY_SIZE(cmd->stats); i++) + for (i = 0; i < MLX5_CMD_OP_MAX; i++) spin_lock_init(&cmd->stats[i].lock); sema_init(&cmd->sem, cmd->max_reg_cmds); @@ -2041,7 +2052,8 @@ err_free_page: err_free_pool: dma_pool_destroy(cmd->pool); - +dma_pool_err: + kvfree(cmd->stats); return err; } EXPORT_SYMBOL(mlx5_cmd_init); @@ -2055,6 +2067,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); + kvfree(cmd->stats); } EXPORT_SYMBOL(mlx5_cmd_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 6409090b3ec5..07c8d9811bc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -171,7 +171,7 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) cmd = &dev->priv.cmdif_debugfs; *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); - for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { + for (i = 0; i < MLX5_CMD_OP_MAX; i++) { stats = &dev->cmd.stats[i]; namep = mlx5_command_str(i); if (strcmp(namep, "unknown command opcode")) { @@ -202,18 +202,23 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, int index, int *is_str) { - u32 out[MLX5_ST_SZ_BYTES(query_qp_out)] = {}; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {}; u64 param = 0; + u32 *out; int state; u32 *qpc; int err; + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return 0; + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); MLX5_SET(query_qp_in, in, qpn, qp->qpn); err = mlx5_cmd_exec_inout(dev, query_qp, in, out); if (err) - return 0; + goto out; *is_str = 0; @@ -269,7 +274,8 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, param = MLX5_GET(qpc, qpc, remote_qpn); break; } - +out: + kfree(out); return param; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 8ecac81a385d..a700f3c86899 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -76,58 +76,59 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p, .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; MASK_VAL_L2(u16, ethertype, ethertype); + MASK_VAL_L2(u8, ip_version, ip_version); PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); PRINT_MASKED_VAL(ethertype, p, "%04x"); - if (ethertype.m == 0xffff) { - if (ethertype.v == ETH_P_IP) { + if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) || + (ip_version.m == 0xf && ip_version.v == 4)) { #define MASK_VAL_L2_BE(type, name, fld) \ MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) - MASK_VAL_L2_BE(u32, src_ipv4, - src_ipv4_src_ipv6.ipv4_layout.ipv4); - MASK_VAL_L2_BE(u32, dst_ipv4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, - "%pI4"); - PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, - "%pI4"); - } else if (ethertype.v == ETH_P_IPV6) { - static const struct in6_addr full_ones = { - .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), - __constant_htonl(0xffffffff), - __constant_htonl(0xffffffff), - __constant_htonl(0xffffffff)}, - }; - DECLARE_MASK_VAL(struct in6_addr, src_ipv6); - DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) || + (ip_version.m == 0xf && ip_version.v == 6)) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); - memcpy(src_ipv6.m.in6_u.u6_addr8, - MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(src_ipv6.m)); - memcpy(dst_ipv6.m.in6_u.u6_addr8, - MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(dst_ipv6.m)); - memcpy(src_ipv6.v.in6_u.u6_addr8, - MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(src_ipv6.v)); - memcpy(dst_ipv6.v.in6_u.u6_addr8, - MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(dst_ipv6.v)); + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); - if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) - trace_seq_printf(p, "src_ipv6=%pI6 ", - &src_ipv6.v); - if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) - trace_seq_printf(p, "dst_ipv6=%pI6 ", - &dst_ipv6.v); - } + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); } #define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c new file mode 100644 index 000000000000..bdb71332cbf2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include <linux/netdevice.h> +#include <linux/list.h> +#include <net/lag.h> + +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "en_rep.h" + +struct mlx5e_rep_bond { + struct notifier_block nb; + struct netdev_net_notifier nn; + struct list_head metadata_list; +}; + +struct mlx5e_rep_bond_slave_entry { + struct list_head list; + struct net_device *netdev; +}; + +struct mlx5e_rep_bond_metadata { + struct list_head list; /* link to global list of rep_bond_metadata */ + struct mlx5_eswitch *esw; + /* private of uplink holding rep bond metadata list */ + struct net_device *lag_dev; + u32 metadata_reg_c_0; + + struct list_head slaves_list; /* slaves list */ + int slaves; +}; + +static struct mlx5e_rep_bond_metadata * +mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_metadata *found = NULL; + struct mlx5e_rep_bond_metadata *cur; + + list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { + if (cur->lag_dev == lag_dev) { + found = cur; + break; + } + } + + return found; +} + +static struct mlx5e_rep_bond_slave_entry * +mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, + const struct net_device *netdev) +{ + struct mlx5e_rep_bond_slave_entry *found = NULL; + struct mlx5e_rep_bond_slave_entry *cur; + + list_for_each_entry(cur, &mdata->slaves_list, list) { + if (cur->netdev == netdev) { + found = cur; + break; + } + } + + return found; +} + +static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) +{ + netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + list_del(&mdata->list); + mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); + WARN_ON(!list_empty(&mdata->slaves_list)); + kfree(mdata); +} + +/* This must be called under rtnl_lock */ +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + int err; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) { + /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) + return -ENOMEM; + + mdata->lag_dev = lag_dev; + mdata->esw = esw; + INIT_LIST_HEAD(&mdata->slaves_list); + mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); + if (!mdata->metadata_reg_c_0) { + kfree(mdata); + return -ENOSPC; + } + list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); + + netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + } + + s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); + if (!s_entry) { + err = -ENOMEM; + goto entry_alloc_err; + } + + s_entry->netdev = netdev; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); + if (err) + goto ingress_err; + + mdata->slaves++; + list_add_tail(&s_entry->list, &mdata->slaves_list); + netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + return 0; + +ingress_err: + kfree(s_entry); +entry_alloc_err: + if (!mdata->slaves) + mlx5e_rep_bond_metadata_release(mdata); + return err; +} + +/* This must be called under rtnl_lock */ +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) + return; + + s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); + if (!s_entry) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + /* Reset bond_metadata to zero first then reset all ingress/egress + * acls and rx rules of unslave representor's vport + */ + mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); + mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); + mlx5e_rep_bond_update(priv, false); + + list_del(&s_entry->list); + + netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + if (--mdata->slaves == 0) + mlx5e_rep_bond_metadata_release(mdata); + kfree(s_entry); +} + +static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + /* A given netdev is not a representor or not a slave of LAG configuration */ + if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev)) + return false; + + /* Egress acl forward to vport is supported only non-uplink representor */ + return rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + +static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct netdev_lag_lower_state_info *lag_info; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + struct list_head *iter; + struct net_device *dev; + u16 acl_vport_num; + u16 fwd_vport_num; + int err; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return; + + info = ptr; + lag_info = info->lower_state_info; + /* This is not an event of a representor becoming active slave */ + if (!lag_info->tx_enabled) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + fwd_vport_num = rpriv->rep->vport; + lag_dev = netdev_master_upper_dev_get(netdev); + + netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", + lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); + + /* Point everyone's egress acl to the vport of the active representor */ + netdev_for_each_lower_dev(lag_dev, dev, iter) { + priv = netdev_priv(dev); + rpriv = priv->ppriv; + acl_vport_num = rpriv->rep->vport; + if (acl_vport_num != fwd_vport_num) { + /* Only single rx_rule for unique bond_metadata should be + * present, delete it if it's saved as passive vport's + * rx_rule with destination as passive vport's root_ft + */ + mlx5e_rep_bond_update(priv, true); + err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, + fwd_vport_num, + acl_vport_num); + if (err) + netdev_warn(dev, + "configure slave vport(%d) egress fwd, err(%d)", + acl_vport_num, err); + } + } + + /* Insert new rx_rule for unique bond_metadata, save it as active vport's + * rx_rule with new destination as active vport's root_ft + */ + err = mlx5e_rep_bond_update(netdev_priv(netdev), false); + if (err) + netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", + fwd_vport_num, err); +} + +static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + lag_dev = info->upper_dev; + + netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", + info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); + + if (info->linking) + mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); + else + mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); +} + +/* Bond device of representors and netdev events are used here in specific way + * to support eswitch vports bonding and to perform failover of eswitch vport + * by modifying the vport's egress acl of lower dev representors. Thus this + * also change the traditional behavior of lower dev under bond device. + * All non-representor netdevs or representors of other vendors as lower dev + * of bond device are not supported. + */ +static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + mlx5e_rep_changelowerstate_event(netdev, ptr); + break; + case NETDEV_CHANGEUPPER: + mlx5e_rep_changeupper_event(netdev, ptr); + break; + } + return NOTIFY_DONE; +} + +/* If HW support eswitch vports bonding, register a specific notifier to + * handle it when two or more representors are bonded + */ +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv; + int ret = 0; + + priv = netdev_priv(netdev); + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) + goto out; + + uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); + if (!uplink_priv->bond) { + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); + uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; + ret = register_netdevice_notifier_dev_net(netdev, + &uplink_priv->bond->nb, + &uplink_priv->bond->nn); + if (ret) { + netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); + kvfree(uplink_priv->bond); + uplink_priv->bond = NULL; + } + +out: + return ret; +} + +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || + !rpriv->uplink_priv.bond) + return; + + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &rpriv->uplink_priv.bond->nb, + &rpriv->uplink_priv.bond->nn); + kvfree(rpriv->uplink_priv.bond); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 995b2ef1fb3b..afc19dca1f5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -119,7 +119,7 @@ mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) } static int -mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, +mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_rule *rule) { void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -134,10 +134,8 @@ mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec, flow_rule_match_basic(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(match.mask->n_proto)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(match.key->n_proto)); + mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, + headers_v); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -533,7 +531,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->counter = entry->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; - mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->zone & MLX5_CT_ZONE_MASK, MLX5_CT_ZONE_MASK); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index e99382f58807..7cce85faa16f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -512,6 +512,13 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_basic key_basic = {}; + struct flow_dissector_key_basic mask_basic = { + .n_proto = htons(0xFFFF), + }; + struct flow_match_basic match_basic = { + .key = &key_basic, .mask = &mask_basic, + }; struct flow_match_control match; u16 addr_type; @@ -537,10 +544,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, ntohl(match.key->dst)); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); + key_basic.n_proto = htons(ETH_P_IP); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { struct flow_match_ipv6_addrs match; @@ -563,10 +569,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); + key_basic.n_proto = htons(ETH_P_IPV6); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index b4a3c96d34fd..1f9526244222 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -25,35 +25,21 @@ static int init_encap_attr(struct net_device *tunnel_dev, return 0; } -static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl) -{ - u32 res; - - /* mpls label is 32 bits long and construction as follows: - * 20 bits label - * 3 bits tos - * 1 bit bottom of stack. Since we support only one label, this bit is - * always set. - * 8 bits TTL - */ - res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) << 9 | ttl; - return cpu_to_be32(res); -} - static int generate_ip_tun_hdr(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *r) { const struct ip_tunnel_key *tun_key = &r->tun_info->key; - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); struct udphdr *udp = (struct udphdr *)(buf); struct mpls_shim_hdr *mpls; + u32 tun_id; + tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id)); mpls = (struct mpls_shim_hdr *)(udp + 1); *ip_proto = IPPROTO_UDP; udp->dest = tun_key->tp_dst; - mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl); + *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index c27e9a609d51..1fbb5a90cb38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -167,7 +167,7 @@ static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_tls_offload_context_rx *rx_ctx; - u64 rcd_sn = *(u64 *)rcd_sn_data; + __be64 rcd_sn = *(__be64 *)rcd_sn_data; if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index ec7b332d74c2..bc102d094bbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -985,7 +985,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, return err; } -const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { +static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { .ieee_getets = mlx5e_dcbnl_ieee_getets, .ieee_setets = mlx5e_dcbnl_ieee_setets, .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4e13e37a9ecd..af89a4803c7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -854,6 +854,24 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) return 0; } +static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!rpriv->vport_rx_rule) + return; + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + rpriv->vport_rx_rule = NULL; +} + +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) +{ + rep_vport_rx_rule_destroy(priv); + + return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv); +} + static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -918,9 +936,7 @@ err_close_drop_rq: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5_del_flow_rules(rpriv->vport_rx_rule); + rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); @@ -959,16 +975,18 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + mlx5e_rep_bond_init(rpriv); err = mlx5e_rep_tc_netdevice_event_register(rpriv); if (err) { mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n", err); - goto tc_rep_cleanup; + goto err_event_reg; } return 0; -tc_rep_cleanup: +err_event_reg: + mlx5e_rep_bond_cleanup(rpriv); mlx5e_rep_tc_cleanup(rpriv); return err; } @@ -1001,7 +1019,7 @@ static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) { mlx5e_rep_tc_netdevice_event_unregister(rpriv); mlx5e_rep_indr_clean_block_privs(rpriv); - + mlx5e_rep_bond_cleanup(rpriv); mlx5e_rep_tc_cleanup(rpriv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 1c4af8522467..da9f1686d525 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -56,6 +56,7 @@ struct mlx5e_neigh_update_table { }; struct mlx5_tc_ct_priv; +struct mlx5e_rep_bond; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -89,6 +90,9 @@ struct mlx5_rep_uplink_priv { struct mapping_ctx *tunnel_enc_opts_mapping; struct mlx5_tc_ct_priv *ct_priv; + + /* support eswitch vports bonding */ + struct mlx5e_rep_bond *bond; }; struct mlx5e_rep_priv { @@ -211,6 +215,15 @@ struct mlx5e_rep_sq { void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv); +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev); +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev); +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup); + bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6b3c82da199c..dbb1c6323967 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1056,8 +1056,8 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, u32 len, struct xdp_buff *xdp) { xdp->data_hard_start = va; - xdp_set_data_meta_invalid(xdp); xdp->data = va + headroom; + xdp_set_data_meta_invalid(xdp); xdp->data_end = xdp->data + len; xdp->rxq = &rq->xdp_rxq; xdp->frame_sz = rq->buff.frame0_sz; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 571da14809fe..3ce177c24d52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -50,6 +50,7 @@ #include <net/arp.h> #include <net/ipv6_stubs.h> #include <net/bareudp.h> +#include <net/bonding.h> #include "en.h" #include "en_rep.h" #include "en/rep/tc.h" @@ -145,6 +146,7 @@ struct mlx5e_tc_flow { struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + struct net_device *orig_dev; /* netdev adding flow first */ int tmp_efi_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ refcount_t refcnt; @@ -208,8 +210,8 @@ mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, fmask = headers_c + soffset; fval = headers_v + soffset; - mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); - data = cpu_to_be32(data) >> (32 - (match_len * 8)); + mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8)); + data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8)); memcpy(fmask, &mask, match_len); memcpy(fval, &data, match_len); @@ -1871,7 +1873,7 @@ enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, memchr_inv(opt->opt_data, 0, opt->length * 4)) { *dont_care = false; - if (opt->opt_class != U16_MAX || + if (opt->opt_class != htons(U16_MAX) || opt->type != U8_MAX) { NL_SET_ERR_MSG(extack, "Partial match of tunnel options in chain > 0 isn't supported"); @@ -2018,6 +2020,32 @@ u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) return flow->tunnel_id; } +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v) +{ + bool ip_version_cap; + + ip_version_cap = outer ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + + if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && + (match->key->n_proto == htons(ETH_P_IP) || + match->key->n_proto == htons(ETH_P_IPV6))) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, + match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match->mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match->key->n_proto)); + } +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2239,10 +2267,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_match_basic match; flow_rule_match_basic(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(match.mask->n_proto)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(match.key->n_proto)); + mlx5e_tc_set_ethertype(priv->mdev, &match, + match_level == outer_match_level, + headers_c, headers_v); if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; @@ -2788,10 +2815,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, continue; if (f->field_bsize == 32) { - mask_be32 = (__be32)mask; + mask_be32 = (__force __be32)(mask); mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); } else if (f->field_bsize == 16) { - mask_be32 = (__be32)mask; + mask_be32 = (__force __be32)(mask); mask_be16 = *(__be16 *)&mask_be32; mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); } @@ -3118,16 +3145,19 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, { const struct flow_action_entry *act; bool modify_ip_header; + void *headers_c; void *headers_v; u16 ethertype; u8 ip_proto; int i, err; + headers_c = get_match_headers_criteria(actions, spec); headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ - if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && + ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) goto out_ok; modify_ip_header = false; @@ -3637,7 +3667,7 @@ static int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; uintptr_t hash_key; - int err; + int err = 0; parse_attr = attr->parse_attr; if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { @@ -3758,6 +3788,28 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } +static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, + struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + static int add_vlan_push_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, struct net_device **out_dev, @@ -4073,7 +4125,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper; if (is_duplicated_output_device(priv->netdev, out_dev, @@ -4085,14 +4136,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, ifindexes[if_count] = out_dev->ifindex; if_count++; - rcu_read_lock(); - uplink_upper = - netdev_master_upper_dev_get_rcu(uplink_dev); - if (uplink_upper && - netif_is_lag_master(uplink_upper) && - uplink_upper == out_dev) - out_dev = uplink_dev; - rcu_read_unlock(); + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@ -4624,11 +4670,21 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return err; } +static bool is_flow_rule_duplicate_allowed(struct net_device *dev, + struct mlx5e_rep_priv *rpriv) +{ + /* Offloaded flow rule is allowed to duplicate on non-uplink representor + * sharing tc block with other slaves of a lag device. + */ + return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_tc_flow *flow; int err = 0; @@ -4636,6 +4692,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); rcu_read_unlock(); if (flow) { + /* Same flow rule offloaded to non-uplink representor sharing tc block, + * just return 0. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) + goto out; + NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); netdev_warn_once(priv->netdev, @@ -4650,6 +4712,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, if (err) goto out; + /* Flow rule offloaded to non-uplink representor sharing tc block, + * set the flow's owner dev. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv)) + flow->orig_dev = dev; + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 037aa73bf9ab..5c330b0cae21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -170,6 +170,10 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); struct mlx5e_tc_flow; u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v); + #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) int mlx5e_tc_nic_init(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c new file mode 100644 index 000000000000..d46f8b225ebe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->egress.legacy.drop_rule); + vport->egress.legacy.drop_rule = NULL; + } +} + +static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *drop_grp; + u32 *flow_group_in; + int err = 0; + + err = esw_acl_egress_vlan_grp_create(esw, vport); + if (err) + return err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto alloc_err; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto drop_grp_err; + } + + vport->egress.legacy.drop_grp = drop_grp; + kvfree(flow_group_in); + return 0; + +drop_grp_err: + kvfree(flow_group_in); +alloc_err: + esw_acl_egress_vlan_grp_destroy(vport); + return err; +} + +static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) { + mlx5_destroy_flow_group(vport->egress.legacy.drop_grp); + vport->egress.legacy.drop_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); +} + +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_fc *drop_counter = NULL; + struct mlx5_flow_act flow_act = {}; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int dest_num = 0; + int err = 0; + + if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(drop_counter)) + esw_warn(esw->dev, + "vport[%d] configure egress drop rule counter err(%ld)\n", + vport->vport, PTR_ERR(drop_counter)); + vport->egress.legacy.drop_counter = drop_counter; + } + + esw_acl_egress_lgcy_rules_destroy(vport); + + if (!vport->info.vlan && !vport->info.qos) { + esw_acl_egress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!IS_ERR_OR_NULL(vport->egress.acl)) + return 0; + + vport->egress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR_OR_NULL(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + /* Allowed vlan rule */ + err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, + MLX5_FLOW_CONTEXT_ACTION_ALLOW); + if (err) + goto out; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* Attach egress drop flow counter */ + if (!IS_ERR_OR_NULL(drop_counter)) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->egress.legacy.drop_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->egress.legacy.drop_rule)) { + err = PTR_ERR(vport->egress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.legacy.drop_rule = NULL; + goto out; + } + + return err; + +out: + esw_acl_egress_lgcy_cleanup(esw, vport); + return err; +} + +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_acl_egress_lgcy_rules_destroy(vport); + esw_acl_egress_lgcy_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); + +clean_drop_counter: + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) { + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c new file mode 100644 index 000000000000..07b2acd7e6b3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.fwd_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.fwd_rule); + vport->egress.offloads.fwd_rule = NULL; +} + +static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + struct mlx5_flow_act flow_act = {}; + int err = 0; + + esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n", + vport->vport, fwd_dest->vport.num); + + /* Delete the old egress forward-to-vport rule if any */ + esw_acl_egress_ofld_fwd2vport_destroy(vport); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + vport->egress.offloads.fwd_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, fwd_dest, 1); + if (IS_ERR(vport->egress.offloads.fwd_rule)) { + err = PTR_ERR(vport->egress.offloads.fwd_rule); + esw_warn(esw->dev, + "vport(%d) failed to add fwd2vport acl rule err(%d)\n", + vport->vport, err); + vport->egress.offloads.fwd_rule = NULL; + } + + return err; +} + +static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + int err = 0; + int action; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); + + action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + /* prio tag vlan rule - pop it so vport receives untagged packets */ + err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action); + if (err) + goto prio_err; + } + + if (fwd_dest) { + err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest); + if (err) + goto fwd_err; + } + + return 0; + +fwd_err: + esw_acl_egress_vlan_destroy(vport); +prio_err: + return err; +} + +static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + esw_acl_egress_ofld_fwd2vport_destroy(vport); +} + +static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fwd_grp; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + ret = esw_acl_egress_vlan_grp_create(esw, vport); + if (ret) + return ret; + + flow_index++; + } + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw)) + goto out; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + ret = -ENOMEM; + goto fwd_grp_err; + } + + /* This group holds 1 FTE to forward all packets to other vport + * when bond vports is supported. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(fwd_grp)) { + ret = PTR_ERR(fwd_grp); + esw_warn(esw->dev, + "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n", + vport->vport, ret); + kvfree(flow_group_in); + goto fwd_grp_err; + } + vport->egress.offloads.fwd_grp = fwd_grp; + kvfree(flow_group_in); + return 0; + +fwd_grp_err: + esw_acl_egress_vlan_grp_destroy(vport); +out: + return ret; +} + +static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); + vport->egress.offloads.fwd_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); +} + +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int table_size = 0; + int err; + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_acl_egress_ofld_rules_destroy(vport); + + if (mlx5_esw_acl_egress_fwd2vport_supported(esw)) + table_size++; + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) + table_size++; + vport->egress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); + if (IS_ERR_OR_NULL(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + return err; + } + + err = esw_acl_egress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport); + + err = esw_acl_egress_ofld_rules_create(esw, vport, NULL); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_egress_ofld_groups_destroy(vport); +group_err: + esw_acl_egress_table_destroy(vport); + return err; +} + +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport) +{ + esw_acl_egress_ofld_rules_destroy(vport); + esw_acl_egress_ofld_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); +} + +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num) +{ + struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num); + struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num); + struct mlx5_flow_destination fwd_dest = {}; + + if (IS_ERR(active_vport)) + return PTR_ERR(active_vport); + if (IS_ERR(passive_vport)) + return PTR_ERR(passive_vport); + + /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */ + esw_acl_egress_ofld_rules_destroy(active_vport); + esw_acl_egress_ofld_rules_create(esw, active_vport, NULL); + + /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */ + esw_acl_egress_ofld_rules_destroy(passive_vport); + fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + fwd_dest.vport.num = active_vport_num; + fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + + return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest); +} + +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) + return PTR_ERR(vport); + + esw_acl_egress_ofld_rules_destroy(vport); + return esw_acl_egress_ofld_rules_create(esw, vport, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c new file mode 100644 index 000000000000..22f4c1c28006 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" + +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + int acl_supported; + int vport_index; + int err; + + acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ? + MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) : + MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support); + + if (!acl_supported) + return ERR_PTR(-EOPNOTSUPP); + + esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); + + vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num); + root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", + vport_num); + return ERR_PTR(-EOPNOTSUPP); + } + + acl = mlx5_create_vport_flow_table(root_ns, 0, size, 0, vport_num); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err); + } + return acl; +} + +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (vport->egress.allowed_vlan) + return -EEXIST; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = flow_action; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, fwd_dest, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + } + + kvfree(spec); + return err; +} + +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + mlx5_del_flow_rules(vport->egress.allowed_vlan); + vport->egress.allowed_vlan = NULL; + } +} + +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp; + void *match_criteria; + u32 *flow_group_in; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(vlan_grp)) { + ret = PTR_ERR(vlan_grp); + esw_warn(esw->dev, + "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n", + vport->vport, ret); + goto out; + } + vport->egress.vlan_grp = vlan_grp; + +out: + kvfree(flow_group_in); + return ret; +} + +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) { + mlx5_destroy_flow_group(vport->egress.vlan_grp); + vport->egress.vlan_grp = NULL; + } +} + +void esw_acl_egress_table_destroy(struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; +} + +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.acl) + return; + + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; +} + +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.allow_rule) + return; + + mlx5_del_flow_rules(vport->ingress.allow_rule); + vport->ingress.allow_rule = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h new file mode 100644 index 000000000000..8dc4cab66a71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_HELPER_H__ +#define __MLX5_ESWITCH_ACL_HELPER_H__ + +#include "eswitch.h" + +/* General acl helper functions */ +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size); + +/* Egress acl helper functions */ +void esw_acl_egress_table_destroy(struct mlx5_vport *vport); +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action); +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport); +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport); + +/* Ingress acl helper functions */ +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport); +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c new file mode 100644 index 000000000000..9bda4fe2eafa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.drop_rule) { + mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); + vport->ingress.legacy.drop_rule = NULL; + } + esw_acl_ingress_allow_rule_destroy(vport); +} + +static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto spoof_err; + } + vport->ingress.legacy.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, err); + goto untagged_err; + } + vport->ingress.legacy.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", + vport->vport, err); + goto allow_spoof_err; + } + vport->ingress.legacy.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, err); + goto drop_err; + } + vport->ingress.legacy.drop_grp = g; + kvfree(flow_group_in); + return 0; + +drop_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } +allow_spoof_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } +untagged_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } +spoof_err: + kvfree(flow_group_in); + return err; +} + +static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.allow_spoofchk_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } + if (vport->ingress.legacy.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); + vport->ingress.legacy.drop_grp = NULL; + } +} + +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec = NULL; + struct mlx5_fc *counter = NULL; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int dest_num = 0; + int err = 0; + u8 *smac_v; + + esw_acl_ingress_lgcy_rules_destroy(vport); + + if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(counter)) + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule counter failed\n", + vport->vport); + vport->ingress.legacy.drop_counter = counter; + } + + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { + esw_acl_ingress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!vport->ingress.acl) { + vport->ingress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + table_size); + if (IS_ERR_OR_NULL(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_lgcy_groups_create(esw, vport); + if (err) + goto out; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + if (vport->info.vlan || vport->info.qos) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + + if (vport->info.spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, vport->info.mac); + } + + /* Create ingress allow rule */ + memset(spec, 0, sizeof(*spec)); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + /* Attach drop flow counter */ + if (counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->ingress.legacy.drop_rule = + mlx5_add_flow_rules(vport->ingress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->ingress.legacy.drop_rule)) { + err = PTR_ERR(vport->ingress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.legacy.drop_rule = NULL; + goto out; + } + kvfree(spec); + return 0; + +out: + esw_acl_ingress_lgcy_cleanup(esw, vport); + kvfree(spec); + return err; +} + +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_acl_ingress_lgcy_rules_destroy(vport); + esw_acl_ingress_lgcy_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); + +clean_drop_counter: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_counter)) { + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c new file mode 100644 index 000000000000..4e55d7225a26 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static bool +esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) +{ + return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)); +} + +static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + /* For prio tag mode, there is only 1 FTEs: + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow + * Unmatched traffic is allowed by default + */ + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Untagged packets - push prio tag VLAN, allow */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + flow_act.vlan[0].vid = 0; + flow_act.vlan[0].prio = 0; + + if (vport->ingress.offloads.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + } + + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress untagged allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + } + + kvfree(spec); + return err; +} + +static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); + + vport->ingress.offloads.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.offloads.modify_metadata)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata); + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + vport->ingress.offloads.modify_metadata_rule = + mlx5_add_flow_rules(vport->ingress.acl, + NULL, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; + } + return err; +} + +static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.modify_metadata_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; +} + +static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_acl_ingress_mod_metadata_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress modify metadata, err(%d)\n", + vport->vport, err); + return err; + } + } + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + err = esw_acl_ingress_prio_tag_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress prio tag rule, err(%d)\n", + vport->vport, err); + goto prio_tag_err; + } + } + + return 0; + +prio_tag_err: + esw_acl_ingress_mod_metadata_destroy(esw, vport); + return err; +} + +static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_allow_rule_destroy(vport); + esw_acl_ingress_mod_metadata_destroy(esw, vport); +} + +static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + /* This group is to hold FTE to match untagged packets when prio_tag + * is enabled. + */ + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, ret); + goto prio_tag_err; + } + vport->ingress.offloads.metadata_prio_tag_grp = g; + flow_index++; + } + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + /* This group holds an FTE with no match to add metadata for + * tagged packets if prio-tag is enabled, or for all untagged + * traffic in case prio-tag is disabled. + */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto metadata_err; + } + vport->ingress.offloads.metadata_allmatch_grp = g; + } + + kvfree(flow_group_in); + return 0; + +metadata_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } +prio_tag_err: + kvfree(flow_group_in); + return ret; +} + +static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.offloads.metadata_allmatch_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp); + vport->ingress.offloads.metadata_allmatch_grp = NULL; + } + + if (vport->ingress.offloads.metadata_prio_tag_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } +} + +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int num_ftes = 0; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !esw_acl_ingress_prio_tag_enabled(esw, vport)) + return 0; + + esw_acl_ingress_allow_rule_destroy(vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + num_ftes++; + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) + num_ftes++; + + vport->ingress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + num_ftes); + if (IS_ERR_OR_NULL(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_ingress_ofld_groups_destroy(vport); +group_err: + esw_acl_ingress_table_destroy(vport); + return err; +} + +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_ofld_rules_destroy(esw, vport); + esw_acl_ingress_ofld_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); +} + +/* Caller must hold rtnl_lock */ +int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + int err; + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + err = PTR_ERR(vport); + goto out; + } + + esw_acl_ingress_ofld_rules_destroy(esw, vport); + + vport->metadata = metadata ? metadata : vport->default_metadata; + + /* Recreate ingress acl rules with vport->metadata */ + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto out; + + return 0; + +out: + vport->metadata = vport->default_metadata; + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h new file mode 100644 index 000000000000..44c152da3d83 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_LGCY_H__ +#define __MLX5_ESWITCH_ACL_LGCY_H__ + +#include "eswitch.h" + +/* Eswitch acl egress external APIs */ +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h new file mode 100644 index 000000000000..c57869b93d60 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_OFLD_H__ +#define __MLX5_ESWITCH_ACL_OFLD_H__ + +#include "eswitch.h" + +/* Eswitch acl egress external APIs */ +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num); +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num); + +static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw) +{ + return esw && esw->mode == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport); +} + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata); + +#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ac79b7c9aeb3..1116ab9bea6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -35,6 +35,7 @@ #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> +#include "esw/acl/lgcy.h" #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" @@ -936,512 +937,6 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_group *vlan_grp = NULL; - struct mlx5_flow_group *drop_grp = NULL; - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *acl; - void *match_criteria; - u32 *flow_group_in; - /* The egress acl table contains 2 rules: - * 1)Allow traffic with vlan_tag=vst_vlan_id - * 2)Drop all other traffic. - */ - int table_size = 2; - int err = 0; - - if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - if (!IS_ERR_OR_NULL(vport->egress.acl)) - return 0; - - esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", - vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); - - root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - mlx5_eswitch_vport_num_to_index(esw, vport->vport)); - if (!root_ns) { - esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); - return -EOPNOTSUPP; - } - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) - return -ENOMEM; - - acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR(acl)) { - err = PTR_ERR(acl); - esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", - vport->vport, err); - goto out; - } - - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); - - vlan_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR(vlan_grp)) { - err = PTR_ERR(vlan_grp); - esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", - vport->vport, err); - goto out; - } - - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); - drop_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR(drop_grp)) { - err = PTR_ERR(drop_grp); - esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", - vport->vport, err); - goto out; - } - - vport->egress.acl = acl; - vport->egress.drop_grp = drop_grp; - vport->egress.allowed_vlans_grp = vlan_grp; -out: - kvfree(flow_group_in); - if (err && !IS_ERR_OR_NULL(vlan_grp)) - mlx5_destroy_flow_group(vlan_grp); - if (err && !IS_ERR_OR_NULL(acl)) - mlx5_destroy_flow_table(acl); - return err; -} - -void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { - mlx5_del_flow_rules(vport->egress.allowed_vlan); - vport->egress.allowed_vlan = NULL; - } - - if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { - mlx5_del_flow_rules(vport->egress.legacy.drop_rule); - vport->egress.legacy.drop_rule = NULL; - } -} - -void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - if (IS_ERR_OR_NULL(vport->egress.acl)) - return; - - esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); - - esw_vport_cleanup_egress_rules(esw, vport); - mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp); - mlx5_destroy_flow_group(vport->egress.drop_grp); - mlx5_destroy_flow_table(vport->egress.acl); - vport->egress.allowed_vlans_grp = NULL; - vport->egress.drop_grp = NULL; - vport->egress.acl = NULL; -} - -static int -esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_group *g; - void *match_criteria; - u32 *flow_group_in; - int err; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) - return -ENOMEM; - - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", - vport->vport, err); - goto spoof_err; - } - vport->ingress.legacy.allow_untagged_spoofchk_grp = g; - - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", - vport->vport, err); - goto untagged_err; - } - vport->ingress.legacy.allow_untagged_only_grp = g; - - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", - vport->vport, err); - goto allow_spoof_err; - } - vport->ingress.legacy.allow_spoofchk_only_grp = g; - - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", - vport->vport, err); - goto drop_err; - } - vport->ingress.legacy.drop_grp = g; - kvfree(flow_group_in); - return 0; - -drop_err: - if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); - vport->ingress.legacy.allow_spoofchk_only_grp = NULL; - } -allow_spoof_err: - if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); - vport->ingress.legacy.allow_untagged_only_grp = NULL; - } -untagged_err: - if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); - vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; - } -spoof_err: - kvfree(flow_group_in); - return err; -} - -int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, int table_size) -{ - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *acl; - int vport_index; - int err; - - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", - vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); - - vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport); - root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport_index); - if (!root_ns) { - esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", - vport->vport); - return -EOPNOTSUPP; - } - - acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR(acl)) { - err = PTR_ERR(acl); - esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n", - vport->vport, err); - return err; - } - vport->ingress.acl = acl; - return 0; -} - -void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport) -{ - if (!vport->ingress.acl) - return; - - mlx5_destroy_flow_table(vport->ingress.acl); - vport->ingress.acl = NULL; -} - -void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - if (vport->ingress.legacy.drop_rule) { - mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); - vport->ingress.legacy.drop_rule = NULL; - } - - if (vport->ingress.allow_rule) { - mlx5_del_flow_rules(vport->ingress.allow_rule); - vport->ingress.allow_rule = NULL; - } -} - -static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - if (!vport->ingress.acl) - return; - - esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); - - esw_vport_cleanup_ingress_rules(esw, vport); - if (vport->ingress.legacy.allow_spoofchk_only_grp) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); - vport->ingress.legacy.allow_spoofchk_only_grp = NULL; - } - if (vport->ingress.legacy.allow_untagged_only_grp) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); - vport->ingress.legacy.allow_untagged_only_grp = NULL; - } - if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { - mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); - vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; - } - if (vport->ingress.legacy.drop_grp) { - mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); - vport->ingress.legacy.drop_grp = NULL; - } - esw_vport_destroy_ingress_acl_table(vport); -} - -static int esw_vport_ingress_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - struct mlx5_fc *counter = vport->ingress.legacy.drop_counter; - struct mlx5_flow_destination drop_ctr_dst = {0}; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec = NULL; - int dest_num = 0; - int err = 0; - u8 *smac_v; - - /* The ingress acl table contains 4 groups - * (2 active rules at the same time - - * 1 allow rule from one of the first 3 groups. - * 1 drop rule from the last group): - * 1)Allow untagged traffic with smac=original mac. - * 2)Allow untagged traffic. - * 3)Allow traffic with smac=original mac. - * 4)Drop all other traffic. - */ - int table_size = 4; - - esw_vport_cleanup_ingress_rules(esw, vport); - - if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { - esw_vport_disable_legacy_ingress_acl(esw, vport); - return 0; - } - - if (!vport->ingress.acl) { - err = esw_vport_create_ingress_acl_table(esw, vport, table_size); - if (err) { - esw_warn(esw->dev, - "vport[%d] enable ingress acl err (%d)\n", - err, vport->vport); - return err; - } - - err = esw_vport_create_legacy_ingress_acl_groups(esw, vport); - if (err) - goto out; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->info.vlan, vport->info.qos); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - - if (vport->info.vlan || vport->info.qos) - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - - if (vport->info.spoofchk) { - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); - smac_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - outer_headers.smac_47_16); - ether_addr_copy(smac_v, vport->info.mac); - } - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; - vport->ingress.allow_rule = - mlx5_add_flow_rules(vport->ingress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->ingress.allow_rule)) { - err = PTR_ERR(vport->ingress.allow_rule); - esw_warn(esw->dev, - "vport[%d] configure ingress allow rule, err(%d)\n", - vport->vport, err); - vport->ingress.allow_rule = NULL; - goto out; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* Attach drop flow counter */ - if (counter) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(counter); - dst = &drop_ctr_dst; - dest_num++; - } - vport->ingress.legacy.drop_rule = - mlx5_add_flow_rules(vport->ingress.acl, NULL, - &flow_act, dst, dest_num); - if (IS_ERR(vport->ingress.legacy.drop_rule)) { - err = PTR_ERR(vport->ingress.legacy.drop_rule); - esw_warn(esw->dev, - "vport[%d] configure ingress drop rule, err(%d)\n", - vport->vport, err); - vport->ingress.legacy.drop_rule = NULL; - goto out; - } - kvfree(spec); - return 0; - -out: - esw_vport_disable_legacy_ingress_acl(esw, vport); - kvfree(spec); - return err; -} - -int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u16 vlan_id, u32 flow_action) -{ - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - int err = 0; - - if (vport->egress.allowed_vlan) - return -EEXIST; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = flow_action; - vport->egress.allowed_vlan = - mlx5_add_flow_rules(vport->egress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->egress.allowed_vlan)) { - err = PTR_ERR(vport->egress.allowed_vlan); - esw_warn(esw->dev, - "vport[%d] configure egress vlan rule failed, err(%d)\n", - vport->vport, err); - vport->egress.allowed_vlan = NULL; - } - - kvfree(spec); - return err; -} - -static int esw_vport_egress_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - struct mlx5_fc *counter = vport->egress.legacy.drop_counter; - struct mlx5_flow_destination drop_ctr_dst = {0}; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_flow_act flow_act = {0}; - int dest_num = 0; - int err = 0; - - esw_vport_cleanup_egress_rules(esw, vport); - - if (!vport->info.vlan && !vport->info.qos) { - esw_vport_disable_egress_acl(esw, vport); - return 0; - } - - err = esw_vport_enable_egress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable egress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->info.vlan, vport->info.qos); - - /* Allowed vlan rule */ - err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan, - MLX5_FLOW_CONTEXT_ACTION_ALLOW); - if (err) - return err; - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* Attach egress drop flow counter */ - if (counter) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(counter); - dst = &drop_ctr_dst; - dest_num++; - } - vport->egress.legacy.drop_rule = - mlx5_add_flow_rules(vport->egress.acl, NULL, - &flow_act, dst, dest_num); - if (IS_ERR(vport->egress.legacy.drop_rule)) { - err = PTR_ERR(vport->egress.legacy.drop_rule); - esw_warn(esw->dev, - "vport[%d] configure egress drop rule failed, err(%d)\n", - vport->vport, err); - vport->egress.legacy.drop_rule = NULL; - } - - return err; -} - static bool element_type_supported(struct mlx5_eswitch *esw, int type) { const struct mlx5_core_dev *dev = esw->dev; @@ -1653,44 +1148,19 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return 0; - if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { - vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(vport->ingress.legacy.drop_counter)) { - esw_warn(esw->dev, - "vport[%d] configure ingress drop rule counter failed\n", - vport->vport); - vport->ingress.legacy.drop_counter = NULL; - } - } - - ret = esw_vport_ingress_config(esw, vport); + ret = esw_acl_ingress_lgcy_setup(esw, vport); if (ret) goto ingress_err; - if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { - vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(vport->egress.legacy.drop_counter)) { - esw_warn(esw->dev, - "vport[%d] configure egress drop rule counter failed\n", - vport->vport); - vport->egress.legacy.drop_counter = NULL; - } - } - - ret = esw_vport_egress_config(esw, vport); + ret = esw_acl_egress_lgcy_setup(esw, vport); if (ret) goto egress_err; return 0; egress_err: - esw_vport_disable_legacy_ingress_acl(esw, vport); - mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); - vport->egress.legacy.drop_counter = NULL; - + esw_acl_ingress_lgcy_cleanup(esw, vport); ingress_err: - mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); - vport->ingress.legacy.drop_counter = NULL; return ret; } @@ -1710,13 +1180,8 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return; - esw_vport_disable_egress_acl(esw, vport); - mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); - vport->egress.legacy.drop_counter = NULL; - - esw_vport_disable_legacy_ingress_acl(esw, vport); - mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); - vport->ingress.legacy.drop_counter = NULL; + esw_acl_egress_lgcy_cleanup(esw, vport); + esw_acl_ingress_lgcy_cleanup(esw, vport); } static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, @@ -2265,6 +1730,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) mutex_init(&esw->offloads.decap_tbl_lock); hash_init(esw->offloads.decap_tbl); atomic64_set(&esw->offloads.num_flows, 0); + ida_init(&esw->offloads.vport_metadata_ida); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -2303,6 +1769,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup_reps(esw); mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); + ida_destroy(&esw->offloads.vport_metadata_ida); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); mutex_destroy(&esw->offloads.decap_tbl_lock); @@ -2348,7 +1815,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, ether_addr_copy(evport->info.mac, mac); evport->info.node_guid = node_guid; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) - err = esw_vport_ingress_config(esw, evport); + err = esw_acl_ingress_lgcy_setup(esw, evport); unlock: mutex_unlock(&esw->state_lock); @@ -2430,10 +1897,10 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, evport->info.vlan = vlan; evport->info.qos = qos; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { - err = esw_vport_ingress_config(esw, evport); + err = esw_acl_ingress_lgcy_setup(esw, evport); if (err) return err; - err = esw_vport_egress_config(esw, evport); + err = esw_acl_egress_lgcy_setup(esw, evport); } return err; @@ -2475,7 +1942,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, "Spoofchk in set while MAC is invalid, vport(%d)\n", evport->vport); if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) - err = esw_vport_ingress_config(esw, evport); + err = esw_acl_ingress_lgcy_setup(esw, evport); if (err) evport->info.spoofchk = pschk; mutex_unlock(&esw->state_lock); @@ -2734,7 +2201,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!vport->enabled) goto unlock; - if (vport->egress.legacy.drop_counter) + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ccbbea3e0505..a5175e98c0b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -99,13 +99,19 @@ struct vport_ingress { struct vport_egress { struct mlx5_flow_table *acl; - struct mlx5_flow_group *allowed_vlans_grp; - struct mlx5_flow_group *drop_grp; struct mlx5_flow_handle *allowed_vlan; - struct { - struct mlx5_flow_handle *drop_rule; - struct mlx5_fc *drop_counter; - } legacy; + struct mlx5_flow_group *vlan_grp; + union { + struct { + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + } offloads; + }; }; struct mlx5_vport_drop_stats { @@ -143,6 +149,8 @@ struct mlx5_vport { struct vport_ingress ingress; struct vport_egress egress; + u32 default_metadata; + u32 metadata; struct mlx5_vport_info info; @@ -218,6 +226,7 @@ struct mlx5_esw_offload { u8 inline_mode; atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; + struct ida vport_metadata_ida; }; /* E-Switch MC FDB table hash node */ @@ -285,18 +294,10 @@ void esw_offloads_disable(struct mlx5_eswitch *esw); int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); -void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - int table_size); -void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport); -void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); + +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); + int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); @@ -458,10 +459,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); -int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - u16 vlan_id, u32 flow_action); - static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) { return esw->qos.enabled; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 554fc64d8ef6..060354bb211a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -31,12 +31,14 @@ */ #include <linux/etherdevice.h> +#include <linux/idr.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "esw/chains.h" #include "rdma.h" #include "en.h" @@ -234,13 +236,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, return &esw->offloads.vport_reps[idx]; } -static bool -esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw, - const struct mlx5_vport *vport) -{ - return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && - mlx5_eswitch_is_vf_vport(esw, vport->vport)); -} static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, @@ -1851,279 +1846,6 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; - int err = 0; - - /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN and modify metadata if - * required, allow - * Unmatched traffic is allowed by default - */ - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - /* Untagged packets - push prio tag VLAN, allow */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0); - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | - MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.vlan[0].ethtype = ETH_P_8021Q; - flow_act.vlan[0].vid = 0; - flow_act.vlan[0].prio = 0; - - if (vport->ingress.offloads.modify_metadata_rule) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; - } - - vport->ingress.allow_rule = - mlx5_add_flow_rules(vport->ingress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->ingress.allow_rule)) { - err = PTR_ERR(vport->ingress.allow_rule); - esw_warn(esw->dev, - "vport[%d] configure ingress untagged allow rule, err(%d)\n", - vport->vport, err); - vport->ingress.allow_rule = NULL; - } - - kvfree(spec); - return err; -} - -static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; - struct mlx5_flow_act flow_act = {}; - int err = 0; - u32 key; - - key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); - key >>= ESW_SOURCE_PORT_METADATA_OFFSET; - - MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, - MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, key); - MLX5_SET(set_action_in, action, offset, - ESW_SOURCE_PORT_METADATA_OFFSET); - MLX5_SET(set_action_in, action, length, - ESW_SOURCE_PORT_METADATA_BITS); - - vport->ingress.offloads.modify_metadata = - mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - 1, action); - if (IS_ERR(vport->ingress.offloads.modify_metadata)) { - err = PTR_ERR(vport->ingress.offloads.modify_metadata); - esw_warn(esw->dev, - "failed to alloc modify header for vport %d ingress acl (%d)\n", - vport->vport, err); - return err; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; - vport->ingress.offloads.modify_metadata_rule = - mlx5_add_flow_rules(vport->ingress.acl, - NULL, &flow_act, NULL, 0); - if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { - err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); - esw_warn(esw->dev, - "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", - vport->vport, err); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); - vport->ingress.offloads.modify_metadata_rule = NULL; - } - return err; -} - -static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - if (vport->ingress.offloads.modify_metadata_rule) { - mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); - - vport->ingress.offloads.modify_metadata_rule = NULL; - } -} - -static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_group *g; - void *match_criteria; - u32 *flow_group_in; - u32 flow_index = 0; - int ret = 0; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) - return -ENOMEM; - - if (esw_check_ingress_prio_tag_enabled(esw, vport)) { - /* This group is to hold FTE to match untagged packets when prio_tag - * is enabled. - */ - memset(flow_group_in, 0, inlen); - - match_criteria = MLX5_ADDR_OF(create_flow_group_in, - flow_group_in, match_criteria); - MLX5_SET(create_flow_group_in, flow_group_in, - match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - ret = PTR_ERR(g); - esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n", - vport->vport, ret); - goto prio_tag_err; - } - vport->ingress.offloads.metadata_prio_tag_grp = g; - flow_index++; - } - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - /* This group holds an FTE with no matches for add metadata for - * tagged packets, if prio-tag is enabled (as a fallthrough), - * or all traffic in case prio-tag is disabled. - */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); - - g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); - if (IS_ERR(g)) { - ret = PTR_ERR(g); - esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", - vport->vport, ret); - goto metadata_err; - } - vport->ingress.offloads.metadata_allmatch_grp = g; - } - - kvfree(flow_group_in); - return 0; - -metadata_err: - if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) { - mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); - vport->ingress.offloads.metadata_prio_tag_grp = NULL; - } -prio_tag_err: - kvfree(flow_group_in); - return ret; -} - -static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport) -{ - if (vport->ingress.offloads.metadata_allmatch_grp) { - mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp); - vport->ingress.offloads.metadata_allmatch_grp = NULL; - } - - if (vport->ingress.offloads.metadata_prio_tag_grp) { - mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); - vport->ingress.offloads.metadata_prio_tag_grp = NULL; - } -} - -static int esw_vport_ingress_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int num_ftes = 0; - int err; - - if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && - !esw_check_ingress_prio_tag_enabled(esw, vport)) - return 0; - - esw_vport_cleanup_ingress_rules(esw, vport); - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - num_ftes++; - if (esw_check_ingress_prio_tag_enabled(esw, vport)) - num_ftes++; - - err = esw_vport_create_ingress_acl_table(esw, vport, num_ftes); - if (err) { - esw_warn(esw->dev, - "failed to enable ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - err = esw_vport_create_ingress_acl_group(esw, vport); - if (err) - goto group_err; - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); - if (err) - goto metadata_err; - } - - if (esw_check_ingress_prio_tag_enabled(esw, vport)) { - err = esw_vport_ingress_prio_tag_config(esw, vport); - if (err) - goto prio_tag_err; - } - return 0; - -prio_tag_err: - esw_vport_del_ingress_acl_modify_metadata(esw, vport); -metadata_err: - esw_vport_destroy_ingress_acl_group(vport); -group_err: - esw_vport_destroy_ingress_acl_table(vport); - return err; -} - -static int esw_vport_egress_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int err; - - if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) - return 0; - - esw_vport_cleanup_egress_rules(esw, vport); - - err = esw_vport_enable_egress_acl(esw, vport); - if (err) - return err; - - /* For prio tag mode, there is only 1 FTEs: - * 1) prio tag packets - pop the prio tag VLAN, allow - * Unmatched traffic is allowed by default - */ - esw_debug(esw->dev, - "vport[%d] configure prio tag egress rules\n", vport->vport); - - /* prio tag vlan rule - pop it so VF receives untagged packets */ - err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0, - MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | - MLX5_FLOW_CONTEXT_ACTION_ALLOW); - if (err) - esw_vport_disable_egress_acl(esw, vport); - - return err; -} - static bool esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) { @@ -2156,25 +1878,83 @@ static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw) esw_check_vport_match_metadata_supported(esw); } +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) +{ + u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1; + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 start; + u32 end; + int id; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + start = (vhca_id << ESW_VPORT_BITS); + end = start + num_vports; + if (!vhca_id) + start += 1; /* zero is reserved/invalid metadata */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL); + + return (id < 0) ? 0 : id; +} + +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) +{ + ida_free(&esw->offloads.vport_metadata_ida, metadata); +} + +static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->vport == MLX5_VPORT_UPLINK) + return 0; + + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + vport->metadata = vport->default_metadata; + return vport->metadata ? 0 : -ENOSPC; +} + +static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata) + return; + + WARN_ON(vport->metadata != vport->default_metadata); + mlx5_esw_match_metadata_free(esw, vport->default_metadata); +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int err; - err = esw_vport_ingress_config(esw, vport); + err = esw_offloads_vport_metadata_setup(esw, vport); if (err) - return err; + goto metadata_err; + + err = esw_acl_ingress_ofld_setup(esw, vport); + if (err) + goto ingress_err; if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { - err = esw_vport_egress_config(esw, vport); - if (err) { - esw_vport_cleanup_ingress_rules(esw, vport); - esw_vport_del_ingress_acl_modify_metadata(esw, vport); - esw_vport_destroy_ingress_acl_group(vport); - esw_vport_destroy_ingress_acl_table(vport); - } + err = esw_acl_egress_ofld_setup(esw, vport); + if (err) + goto egress_err; } + + return 0; + +egress_err: + esw_acl_ingress_ofld_cleanup(esw, vport); +ingress_err: + esw_offloads_vport_metadata_cleanup(esw, vport); +metadata_err: return err; } @@ -2182,11 +1962,9 @@ void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - esw_vport_disable_egress_acl(esw, vport); - esw_vport_cleanup_ingress_rules(esw, vport); - esw_vport_del_ingress_acl_modify_metadata(esw, vport); - esw_vport_destroy_ingress_acl_group(vport); - esw_vport_destroy_ingress_acl_table(vport); + esw_acl_egress_ofld_cleanup(vport); + esw_acl_ingress_ofld_cleanup(esw, vport); + esw_offloads_vport_metadata_cleanup(esw, vport); } static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) @@ -2852,38 +2630,11 @@ EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); - u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); - u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); - u32 val; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); - /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ - WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); - - /* Trim vhca_id to ESW_VHCA_ID_BITS */ - vhca_id &= vhca_id_mask; - - /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they - * don't overlap with VF numbers, and themselves, after trimming. - */ - WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < - vport_num_mask - 1); - WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < - vport_num_mask - 1); - WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == - (MLX5_VPORT_ECPF & vport_num_mask)); - - /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't - * overlap with pf and ecpf. - */ - if (vport_num != MLX5_VPORT_UPLINK && - vport_num != MLX5_VPORT_ECPF) - WARN_ON_ONCE(vport_num >= vport_num_mask - 1); - - /* We can now trim vport_num to ESW_VPORT_BITS */ - vport_num &= vport_num_mask; + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; - val = (vhca_id << ESW_VPORT_BITS) | vport_num; - return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); + return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 22a2ef111514..29b7339ebfa3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -194,8 +194,8 @@ static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) MLX5_GET(tls_flow, flow, direction_sx)); } -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, - u64 rcd_sn) +int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, + u32 seq, __be64 rcd_sn) { struct mlx5_fpga_dma_buf *buf; int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h index 3b2e37bf76fe..5714cf391d1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h @@ -68,7 +68,7 @@ static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) return mdev->fpga->tls->caps; } -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, - u64 rcd_sn); +int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, + u32 seq, __be64 rcd_sn); #endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 1a8e826ac86b..465a1076a477 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -781,6 +781,10 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); table_type = FS_FT_ESW_INGRESS_ACL; break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions); + table_type = FS_FT_RDMA_TX; + break; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4bebbecb078d..13e2fb79c21a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -254,7 +254,7 @@ static void del_sw_flow_group(struct fs_node *node); static void del_sw_fte(struct fs_node *node); static void del_sw_prio(struct fs_node *node); static void del_sw_ns(struct fs_node *node); -/* Delete rule (destination) is special case that +/* Delete rule (destination) is special case that * requires to lock the FTE for all the deletion process. */ static void del_sw_hw_rule(struct fs_node *node); @@ -379,6 +379,12 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } +static bool is_fwd_next_action(u32 action) +{ + return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); +} + static bool check_valid_spec(const struct mlx5_flow_spec *spec) { int i; @@ -499,7 +505,7 @@ static void del_sw_hw_rule(struct fs_node *node) fs_get_obj(rule, node); fs_get_obj(fte, rule->node.parent); trace_mlx5_fs_del_rule(rule); - if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (is_fwd_next_action(rule->sw_action)) { mutex_lock(&rule->dest_attr.ft->lock); list_del(&rule->next_ft); mutex_unlock(&rule->dest_attr.ft->lock); @@ -823,6 +829,36 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) return find_closest_ft(prio, true); } +static struct fs_prio *find_fwd_ns_prio(struct mlx5_flow_root_namespace *root, + struct mlx5_flow_namespace *ns) +{ + struct mlx5_flow_namespace *root_ns = &root->ns; + struct fs_prio *iter_prio; + struct fs_prio *prio; + + fs_get_obj(prio, ns->node.parent); + list_for_each_entry(iter_prio, &root_ns->node.children, node.list) { + if (iter_prio == prio && + !list_is_last(&prio->node.children, &iter_prio->node.list)) + return list_next_entry(iter_prio, node.list); + } + return NULL; +} + +static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct fs_prio *prio; + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) + prio = find_fwd_ns_prio(root, ft->ns); + else + fs_get_obj(prio, ft->node.parent); + + return (prio) ? find_next_chained_ft(prio) : NULL; +} + static int connect_fts_in_prio(struct mlx5_core_dev *dev, struct fs_prio *prio, struct mlx5_flow_table *ft) @@ -973,6 +1009,10 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); mutex_unlock(&old_next_ft->lock); list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) && + iter->ft->ns == new_next_ft->ns) + continue; + err = _mlx5_modify_rule_destination(iter, &dest); if (err) pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", @@ -1074,6 +1114,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa next_ft = unmanaged ? ft_attr->next_ft : find_next_chained_ft(fs_prio); ft->def_miss_action = ns->def_miss_action; + ft->ns = ns; err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); if (err) goto free_ft; @@ -1898,48 +1939,59 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_root_namespace *root = find_root(&ft->node); static const struct mlx5_flow_spec zero_spec = {}; - struct mlx5_flow_destination gen_dest = {}; + struct mlx5_flow_destination *gen_dest = NULL; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; - struct fs_prio *prio; + int i; if (!spec) spec = &zero_spec; - fs_get_obj(prio, ft->node.parent); - if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { - if (!fwd_next_prio_supported(ft)) - return ERR_PTR(-EOPNOTSUPP); - if (num_dest) - return ERR_PTR(-EINVAL); - mutex_lock(&root->chain_lock); - next_ft = find_next_chained_ft(prio); - if (next_ft) { - gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - gen_dest.ft = next_ft; - dest = &gen_dest; - num_dest = 1; - flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else { - mutex_unlock(&root->chain_lock); - return ERR_PTR(-EOPNOTSUPP); - } - } + if (!is_fwd_next_action(sw_action)) + return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); - handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); - if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { - if (!IS_ERR_OR_NULL(handle) && - (list_empty(&handle->rule[0]->next_ft))) { - mutex_lock(&next_ft->lock); - list_add(&handle->rule[0]->next_ft, - &next_ft->fwd_rules); - mutex_unlock(&next_ft->lock); - handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - } - mutex_unlock(&root->chain_lock); - } + mutex_lock(&root->chain_lock); + next_ft = find_next_fwd_ft(ft, flow_act); + if (!next_ft) { + handle = ERR_PTR(-EOPNOTSUPP); + goto unlock; + } + + gen_dest = kcalloc(num_dest + 1, sizeof(*dest), + GFP_KERNEL); + if (!gen_dest) { + handle = ERR_PTR(-ENOMEM); + goto unlock; + } + for (i = 0; i < num_dest; i++) + gen_dest[i] = dest[i]; + gen_dest[i].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest[i].ft = next_ft; + dest = gen_dest; + num_dest++; + flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + if (IS_ERR(handle)) + goto unlock; + + if (list_empty(&handle->rule[num_dest - 1]->next_ft)) { + mutex_lock(&next_ft->lock); + list_add(&handle->rule[num_dest - 1]->next_ft, + &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + handle->rule[num_dest - 1]->sw_action = sw_action; + handle->rule[num_dest - 1]->ft = ft; + } +unlock: + mutex_unlock(&root->chain_lock); + kfree(gen_dest); return handle; } EXPORT_SYMBOL(mlx5_add_flow_rules); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 508108c58dae..825b662f809b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -138,6 +138,7 @@ struct fs_node { struct mlx5_flow_rule { struct fs_node node; + struct mlx5_flow_table *ft; struct mlx5_flow_destination dest_attr; /* next_ft should be accessed under chain_lock and only of * destination type is FWD_NEXT_fT. @@ -175,6 +176,7 @@ struct mlx5_flow_table { u32 flags; struct rhltable fgs_hash; enum mlx5_flow_table_miss_action def_miss_action; + struct mlx5_flow_namespace *ns; }; struct mlx5_ft_underlay_qp { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 92f2395dd31a..09fe2d777550 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -177,6 +177,11 @@ static struct mlx5_profile profile[] = { #define FW_PRE_INIT_TIMEOUT_MILI 120000 #define FW_INIT_WARN_MESSAGE_INTERVAL 20000 +static int fw_initializing(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->initializing) >> 31; +} + static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, u32 warn_time_mili) { @@ -1272,7 +1277,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mlx5_debugfs_root); if (!priv->dbg_root) { dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n"); - return -ENOMEM; + goto err_dbg_root; } err = mlx5_health_init(dev); @@ -1289,15 +1294,27 @@ err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: debugfs_remove(dev->priv.dbg_root); - +err_dbg_root: + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); return err; } static void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { + struct mlx5_priv *priv = &dev->priv; + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg_root); + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); } #define MLX5_IB_MOD "mlx5_ib" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 554811de4c9d..df1363a34a42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1662,7 +1662,7 @@ dr_action_modify_check_field_limitation(struct mlx5dr_action *action, } static bool -dr_action_modify_check_is_ttl_modify(const u64 *sw_action) +dr_action_modify_check_is_ttl_modify(const void *sw_action) { u16 sw_field = MLX5_GET(set_action_in, sw_action, field); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 48b6358b6845..890767a2a7cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -297,7 +297,8 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) dmn->mdev = mdev; dmn->type = type; refcount_set(&dmn->refcount, 1); - mutex_init(&dmn->mutex); + mutex_init(&dmn->info.rx.mutex); + mutex_init(&dmn->info.tx.mutex); if (dr_domain_caps_init(mdev, dmn)) { mlx5dr_err(dmn, "Failed init domain, no caps\n"); @@ -345,9 +346,9 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) int ret = 0; if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { - mutex_lock(&dmn->mutex); + mlx5dr_domain_lock(dmn); ret = mlx5dr_send_ring_force_drain(dmn); - mutex_unlock(&dmn->mutex); + mlx5dr_domain_unlock(dmn); if (ret) { mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", flags, ret); @@ -371,7 +372,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) dr_domain_uninit_cache(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); - mutex_destroy(&dmn->mutex); + mutex_destroy(&dmn->info.tx.mutex); + mutex_destroy(&dmn->info.rx.mutex); kfree(dmn); return 0; } @@ -379,7 +381,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn) { - mutex_lock(&dmn->mutex); + mlx5dr_domain_lock(dmn); if (dmn->peer_dmn) refcount_dec(&dmn->peer_dmn->refcount); @@ -389,5 +391,5 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, if (dmn->peer_dmn) refcount_inc(&dmn->peer_dmn->refcount); - mutex_unlock(&dmn->mutex); + mlx5dr_domain_unlock(dmn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index a95938874798..31abcbb95ca2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -690,7 +690,7 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, refcount_set(&matcher->refcount, 1); INIT_LIST_HEAD(&matcher->matcher_list); - mutex_lock(&tbl->dmn->mutex); + mlx5dr_domain_lock(tbl->dmn); ret = dr_matcher_init(matcher, mask); if (ret) @@ -700,14 +700,14 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, if (ret) goto matcher_uninit; - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); return matcher; matcher_uninit: dr_matcher_uninit(matcher); free_matcher: - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); kfree(matcher); dec_ref: refcount_dec(&tbl->refcount); @@ -791,13 +791,13 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) if (refcount_read(&matcher->refcount) > 1) return -EBUSY; - mutex_lock(&tbl->dmn->mutex); + mlx5dr_domain_lock(tbl->dmn); dr_matcher_remove_from_tbl(matcher); dr_matcher_uninit(matcher); refcount_dec(&matcher->tbl->refcount); - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); kfree(matcher); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index cce3ee7a6614..cd708dcc2e3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -938,7 +938,10 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, struct mlx5dr_rule_rx_tx *nic_rule) { + mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn); dr_rule_clean_rule_members(rule, nic_rule); + mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn); + return 0; } @@ -1039,18 +1042,18 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param)) return 0; + hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); + if (!hw_ste_arr) + return -ENOMEM; + + mlx5dr_domain_nic_lock(nic_dmn); + ret = mlx5dr_matcher_select_builders(matcher, nic_matcher, dr_rule_get_ipv(¶m->outer), dr_rule_get_ipv(¶m->inner)); if (ret) - goto out_err; - - hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); - if (!hw_ste_arr) { - ret = -ENOMEM; - goto out_err; - } + goto free_hw_ste; /* Set the tag values inside the ste array */ ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); @@ -1115,6 +1118,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (htbl) mlx5dr_htbl_put(htbl); + mlx5dr_domain_nic_unlock(nic_dmn); + kfree(hw_ste_arr); return 0; @@ -1129,8 +1134,8 @@ free_rule: kfree(ste_info); } free_hw_ste: + mlx5dr_domain_nic_unlock(nic_dmn); kfree(hw_ste_arr); -out_err: return ret; } @@ -1232,31 +1237,23 @@ struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, { struct mlx5dr_rule *rule; - mutex_lock(&matcher->tbl->dmn->mutex); refcount_inc(&matcher->refcount); rule = dr_rule_create_rule(matcher, value, num_actions, actions); if (!rule) refcount_dec(&matcher->refcount); - mutex_unlock(&matcher->tbl->dmn->mutex); - return rule; } int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { struct mlx5dr_matcher *matcher = rule->matcher; - struct mlx5dr_table *tbl = rule->matcher->tbl; int ret; - mutex_lock(&tbl->dmn->mutex); - ret = dr_rule_destroy_rule(rule); - - mutex_unlock(&tbl->dmn->mutex); - if (!ret) refcount_dec(&matcher->refcount); + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index b8d97d44be7b..f421013b0b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -357,9 +357,11 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, u32 buff_offset; int ret; + spin_lock(&send_ring->lock); + ret = dr_handle_pending_wc(dmn, send_ring); if (ret) - return ret; + goto out_unlock; if (send_info->write.length > dmn->info.max_inline_size) { buff_offset = (send_ring->tx_head & @@ -377,7 +379,9 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, dr_fill_data_segs(send_ring, send_info); dr_post_send(send_ring->qp, send_info); - return 0; +out_unlock: + spin_unlock(&send_ring->lock); + return ret; } static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, @@ -563,9 +567,7 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, send_info.remote_addr = action->rewrite.chunk->mr_addr; send_info.rkey = action->rewrite.chunk->rkey; - mutex_lock(&dmn->mutex); ret = dr_postsend_icm_data(dmn, &send_info); - mutex_unlock(&dmn->mutex); return ret; } @@ -886,6 +888,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) init_attr.pdn = dmn->pdn; init_attr.uar = dmn->uar; init_attr.max_send_wr = QUEUE_SIZE; + spin_lock_init(&dmn->send_ring->lock); dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); if (!dmn->send_ring->qp) { @@ -990,7 +993,9 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) return ret; } + spin_lock(&send_ring->lock); ret = dr_handle_pending_wc(dmn, send_ring); + spin_unlock(&send_ring->lock); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index c0e3a1e7389d..00c2f598f034 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -112,7 +112,7 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length) { u32 crc = crc32(0, input_data, length); - return htonl(crc); + return (__force u32)htonl(crc); } u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) @@ -869,7 +869,7 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) { - u32 raw_ip[4]; + __be32 raw_ip[4]; spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16); @@ -961,7 +961,6 @@ static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec) spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1); spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0); spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a); - spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b); } static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index c2fe48d7b75a..b599b6beb5b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -14,7 +14,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; - mutex_lock(&tbl->dmn->mutex); + mlx5dr_domain_lock(tbl->dmn); if (!list_empty(&tbl->matcher_list)) last_matcher = list_last_entry(&tbl->matcher_list, @@ -78,7 +78,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, refcount_inc(&action->refcount); out: - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); return ret; } @@ -95,7 +95,7 @@ static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) static void dr_table_uninit(struct mlx5dr_table *tbl) { - mutex_lock(&tbl->dmn->mutex); + mlx5dr_domain_lock(tbl->dmn); switch (tbl->dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: @@ -112,7 +112,7 @@ static void dr_table_uninit(struct mlx5dr_table *tbl) break; } - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); } static int dr_table_init_nic(struct mlx5dr_domain *dmn, @@ -177,7 +177,7 @@ static int dr_table_init(struct mlx5dr_table *tbl) INIT_LIST_HEAD(&tbl->matcher_list); - mutex_lock(&tbl->dmn->mutex); + mlx5dr_domain_lock(tbl->dmn); switch (tbl->dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: @@ -201,7 +201,7 @@ static int dr_table_init(struct mlx5dr_table *tbl) break; } - mutex_unlock(&tbl->dmn->mutex); + mlx5dr_domain_unlock(tbl->dmn); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 984783238baa..0883956c58c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -554,8 +554,7 @@ struct mlx5dr_match_misc2 { u32 metadata_reg_c_1; /* metadata_reg_c_1 */ u32 metadata_reg_c_0; /* metadata_reg_c_0 */ u32 metadata_reg_a; /* metadata_reg_a */ - u32 metadata_reg_b; /* metadata_reg_b */ - u8 reserved_auto2[8]; + u8 reserved_auto2[12]; }; struct mlx5dr_match_misc3 { @@ -636,6 +635,7 @@ struct mlx5dr_domain_rx_tx { u64 drop_icm_addr; u64 default_icm_addr; enum mlx5dr_ste_entry_type ste_type; + struct mutex mutex; /* protect rx/tx domain */ }; struct mlx5dr_domain_info { @@ -660,7 +660,6 @@ struct mlx5dr_domain { struct mlx5_uars_page *uar; enum mlx5dr_domain_type type; refcount_t refcount; - struct mutex mutex; /* protect domain */ struct mlx5dr_icm_pool *ste_icm_pool; struct mlx5dr_icm_pool *action_icm_pool; struct mlx5dr_send_ring *send_ring; @@ -814,6 +813,28 @@ struct mlx5dr_icm_chunk { struct list_head *miss_list; }; +static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_lock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_unlock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_lock(&dmn->info.rx); + mlx5dr_domain_nic_lock(&dmn->info.tx); +} + +static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_unlock(&dmn->info.tx); + mlx5dr_domain_nic_unlock(&dmn->info.rx); +} + static inline int mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) { @@ -1043,6 +1064,7 @@ struct mlx5dr_send_ring { struct ib_wc wc[MAX_SEND_CQE]; u8 sync_buff[MIN_READ_SYNC]; struct mlx5dr_mr *sync_mr; + spinlock_t lock; /* Protect the data path of the send ring */ }; int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig index b9c4d48e28e4..09f35209d43d 100644 --- a/drivers/net/ethernet/micrel/Kconfig +++ b/drivers/net/ethernet/micrel/Kconfig @@ -38,6 +38,8 @@ config KS8851_MLL tristate "Micrel KS8851 MLL" depends on HAS_IOMEM select MII + select CRC32 + select EEPROM_93CX6 ---help--- This platform driver is for Micrel KS8851 Address/data bus multiplexed network chip. diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile index 6d8ac5527aef..5cc00d22c708 100644 --- a/drivers/net/ethernet/micrel/Makefile +++ b/drivers/net/ethernet/micrel/Makefile @@ -5,5 +5,7 @@ obj-$(CONFIG_KS8842) += ks8842.o obj-$(CONFIG_KS8851) += ks8851.o +ks8851-objs = ks8851_common.o ks8851_spi.o obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o +ks8851_mll-objs = ks8851_common.o ks8851_par.o obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 8f834aef8e32..2b319e451121 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -7,6 +7,11 @@ * KS8851 register definitions */ +#ifndef __KS8851_H__ +#define __KS8851_H__ + +#include <linux/eeprom_93cx6.h> + #define KS_CCR 0x08 #define CCR_LE (1 << 10) /* KSZ8851-16MLL */ #define CCR_EEPROM (1 << 9) @@ -19,7 +24,7 @@ #define CCR_32PIN (1 << 0) /* KSZ8851SNL */ /* MAC address registers */ -#define KS_MAR(_m) (0x15 - (_m)) +#define KS_MAR(_m) (0x14 - (_m)) #define KS_MARL 0x10 #define KS_MARM 0x12 #define KS_MARH 0x14 @@ -300,3 +305,147 @@ #define TXFR_TXIC (1 << 15) #define TXFR_TXFID_MASK (0x3f << 0) #define TXFR_TXFID_SHIFT (0) + +/** + * struct ks8851_rxctrl - KS8851 driver rx control + * @mchash: Multicast hash-table data. + * @rxcr1: KS_RXCR1 register setting + * @rxcr2: KS_RXCR2 register setting + * + * Representation of the settings needs to control the receive filtering + * such as the multicast hash-filter and the receive register settings. This + * is used to make the job of working out if the receive settings change and + * then issuing the new settings to the worker that will send the necessary + * commands. + */ +struct ks8851_rxctrl { + u16 mchash[4]; + u16 rxcr1; + u16 rxcr2; +}; + +/** + * union ks8851_tx_hdr - tx header data + * @txb: The header as bytes + * @txw: The header as 16bit, little-endian words + * + * A dual representation of the tx header data to allow + * access to individual bytes, and to allow 16bit accesses + * with 16bit alignment. + */ +union ks8851_tx_hdr { + u8 txb[6]; + __le16 txw[3]; +}; + +/** + * struct ks8851_net - KS8851 driver private data + * @netdev: The network device we're bound to + * @statelock: Lock on this structure for tx list. + * @mii: The MII state information for the mii calls. + * @rxctrl: RX settings for @rxctrl_work. + * @rxctrl_work: Work queue for updating RX mode and multicast lists + * @txq: Queue of packets for transmission. + * @txh: Space for generating packet TX header in DMA-able data + * @rxd: Space for receiving SPI data, in DMA-able space. + * @txd: Space for transmitting SPI data, in DMA-able space. + * @msg_enable: The message flags controlling driver output (see ethtool). + * @fid: Incrementing frame id tag. + * @rc_ier: Cached copy of KS_IER. + * @rc_ccr: Cached copy of KS_CCR. + * @rc_rxqcr: Cached copy of KS_RXQCR. + * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM. + * @vdd_reg: Optional regulator supplying the chip + * @vdd_io: Optional digital power supply for IO + * @gpio: Optional reset_n gpio + * @lock: Bus access lock callback + * @unlock: Bus access unlock callback + * @rdreg16: 16bit register read callback + * @wrreg16: 16bit register write callback + * @rdfifo: FIFO read callback + * @wrfifo: FIFO write callback + * @start_xmit: start_xmit() implementation callback + * @rx_skb: rx_skb() implementation callback + * @flush_tx_work: flush_tx_work() implementation callback + * + * The @statelock is used to protect information in the structure which may + * need to be accessed via several sources, such as the network driver layer + * or one of the work queues. + * + * We align the buffers we may use for rx/tx to ensure that if the SPI driver + * wants to DMA map them, it will not have any problems with data the driver + * modifies. + */ +struct ks8851_net { + struct net_device *netdev; + spinlock_t statelock; + + union ks8851_tx_hdr txh ____cacheline_aligned; + u8 rxd[8]; + u8 txd[8]; + + u32 msg_enable ____cacheline_aligned; + u16 tx_space; + u8 fid; + + u16 rc_ier; + u16 rc_rxqcr; + u16 rc_ccr; + + struct mii_if_info mii; + struct ks8851_rxctrl rxctrl; + + struct work_struct rxctrl_work; + + struct sk_buff_head txq; + + struct eeprom_93cx6 eeprom; + struct regulator *vdd_reg; + struct regulator *vdd_io; + int gpio; + + void (*lock)(struct ks8851_net *ks, + unsigned long *flags); + void (*unlock)(struct ks8851_net *ks, + unsigned long *flags); + unsigned int (*rdreg16)(struct ks8851_net *ks, + unsigned int reg); + void (*wrreg16)(struct ks8851_net *ks, + unsigned int reg, unsigned int val); + void (*rdfifo)(struct ks8851_net *ks, u8 *buff, + unsigned int len); + void (*wrfifo)(struct ks8851_net *ks, + struct sk_buff *txp, bool irq); + netdev_tx_t (*start_xmit)(struct sk_buff *skb, + struct net_device *dev); + void (*rx_skb)(struct ks8851_net *ks, + struct sk_buff *skb); + void (*flush_tx_work)(struct ks8851_net *ks); +}; + +int ks8851_probe_common(struct net_device *netdev, struct device *dev, + int msg_en); +int ks8851_remove_common(struct device *dev); +int ks8851_suspend(struct device *dev); +int ks8851_resume(struct device *dev); + +static __maybe_unused SIMPLE_DEV_PM_OPS(ks8851_pm_ops, + ks8851_suspend, ks8851_resume); + +/** + * ks8851_done_tx - update and then free skbuff after transmitting + * @ks: The device state + * @txb: The buffer transmitted + */ +static void __maybe_unused ks8851_done_tx(struct ks8851_net *ks, + struct sk_buff *txb) +{ + struct net_device *dev = ks->netdev; + + dev->stats.tx_bytes += txb->len; + dev->stats.tx_packets++; + + dev_kfree_skb(txb); +} + +#endif /* __KS8851_H__ */ diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851_common.c index 33305c9c5a62..d65872172229 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -19,10 +19,8 @@ #include <linux/cache.h> #include <linux/crc32.h> #include <linux/mii.h> -#include <linux/eeprom_93cx6.h> #include <linux/regulator/consumer.h> -#include <linux/spi/spi.h> #include <linux/gpio.h> #include <linux/of_gpio.h> #include <linux/of_net.h> @@ -30,255 +28,41 @@ #include "ks8851.h" /** - * struct ks8851_rxctrl - KS8851 driver rx control - * @mchash: Multicast hash-table data. - * @rxcr1: KS_RXCR1 register setting - * @rxcr2: KS_RXCR2 register setting - * - * Representation of the settings needs to control the receive filtering - * such as the multicast hash-filter and the receive register settings. This - * is used to make the job of working out if the receive settings change and - * then issuing the new settings to the worker that will send the necessary - * commands. - */ -struct ks8851_rxctrl { - u16 mchash[4]; - u16 rxcr1; - u16 rxcr2; -}; - -/** - * union ks8851_tx_hdr - tx header data - * @txb: The header as bytes - * @txw: The header as 16bit, little-endian words - * - * A dual representation of the tx header data to allow - * access to individual bytes, and to allow 16bit accesses - * with 16bit alignment. - */ -union ks8851_tx_hdr { - u8 txb[6]; - __le16 txw[3]; -}; - -/** - * struct ks8851_net - KS8851 driver private data - * @netdev: The network device we're bound to - * @spidev: The spi device we're bound to. - * @lock: Lock to ensure that the device is not accessed when busy. - * @statelock: Lock on this structure for tx list. - * @mii: The MII state information for the mii calls. - * @rxctrl: RX settings for @rxctrl_work. - * @tx_work: Work queue for tx packets - * @rxctrl_work: Work queue for updating RX mode and multicast lists - * @txq: Queue of packets for transmission. - * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1. - * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2. - * @txh: Space for generating packet TX header in DMA-able data - * @rxd: Space for receiving SPI data, in DMA-able space. - * @txd: Space for transmitting SPI data, in DMA-able space. - * @msg_enable: The message flags controlling driver output (see ethtool). - * @fid: Incrementing frame id tag. - * @rc_ier: Cached copy of KS_IER. - * @rc_ccr: Cached copy of KS_CCR. - * @rc_rxqcr: Cached copy of KS_RXQCR. - * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM. - * @vdd_reg: Optional regulator supplying the chip - * @vdd_io: Optional digital power supply for IO - * @gpio: Optional reset_n gpio - * - * The @lock ensures that the chip is protected when certain operations are - * in progress. When the read or write packet transfer is in progress, most - * of the chip registers are not ccessible until the transfer is finished and - * the DMA has been de-asserted. - * - * The @statelock is used to protect information in the structure which may - * need to be accessed via several sources, such as the network driver layer - * or one of the work queues. - * - * We align the buffers we may use for rx/tx to ensure that if the SPI driver - * wants to DMA map them, it will not have any problems with data the driver - * modifies. - */ -struct ks8851_net { - struct net_device *netdev; - struct spi_device *spidev; - struct mutex lock; - spinlock_t statelock; - - union ks8851_tx_hdr txh ____cacheline_aligned; - u8 rxd[8]; - u8 txd[8]; - - u32 msg_enable ____cacheline_aligned; - u16 tx_space; - u8 fid; - - u16 rc_ier; - u16 rc_rxqcr; - u16 rc_ccr; - - struct mii_if_info mii; - struct ks8851_rxctrl rxctrl; - - struct work_struct tx_work; - struct work_struct rxctrl_work; - - struct sk_buff_head txq; - - struct spi_message spi_msg1; - struct spi_message spi_msg2; - struct spi_transfer spi_xfer1; - struct spi_transfer spi_xfer2[2]; - - struct eeprom_93cx6 eeprom; - struct regulator *vdd_reg; - struct regulator *vdd_io; - int gpio; -}; - -static int msg_enable; - -/* SPI frame opcodes */ -#define KS_SPIOP_RD (0x00) -#define KS_SPIOP_WR (0x40) -#define KS_SPIOP_RXFIFO (0x80) -#define KS_SPIOP_TXFIFO (0xC0) - -/* shift for byte-enable data */ -#define BYTE_EN(_x) ((_x) << 2) - -/* turn register number and byte-enable mask into data for start of packet */ -#define MK_OP(_byteen, _reg) (BYTE_EN(_byteen) | (_reg) << (8+2) | (_reg) >> 6) - -/* SPI register read/write calls. + * ks8851_lock - register access lock + * @ks: The chip state + * @flags: Spinlock flags * - * All these calls issue SPI transactions to access the chip's registers. They - * all require that the necessary lock is held to prevent accesses when the - * chip is busy transferring packet data (RX/TX FIFO accesses). + * Claim chip register access lock */ +static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags) +{ + ks->lock(ks, flags); +} /** - * ks8851_wrreg16 - write 16bit register value to chip + * ks8851_unlock - register access unlock * @ks: The chip state - * @reg: The register address - * @val: The value to write + * @flags: Spinlock flags * - * Issue a write to put the value @val into the register specified in @reg. + * Release chip register access lock */ -static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val) +static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags) { - struct spi_transfer *xfer = &ks->spi_xfer1; - struct spi_message *msg = &ks->spi_msg1; - __le16 txb[2]; - int ret; - - txb[0] = cpu_to_le16(MK_OP(reg & 2 ? 0xC : 0x03, reg) | KS_SPIOP_WR); - txb[1] = cpu_to_le16(val); - - xfer->tx_buf = txb; - xfer->rx_buf = NULL; - xfer->len = 4; - - ret = spi_sync(ks->spidev, msg); - if (ret < 0) - netdev_err(ks->netdev, "spi_sync() failed\n"); + ks->unlock(ks, flags); } /** - * ks8851_wrreg8 - write 8bit register value to chip + * ks8851_wrreg16 - write 16bit register value to chip * @ks: The chip state * @reg: The register address * @val: The value to write * * Issue a write to put the value @val into the register specified in @reg. */ -static void ks8851_wrreg8(struct ks8851_net *ks, unsigned reg, unsigned val) -{ - struct spi_transfer *xfer = &ks->spi_xfer1; - struct spi_message *msg = &ks->spi_msg1; - __le16 txb[2]; - int ret; - int bit; - - bit = 1 << (reg & 3); - - txb[0] = cpu_to_le16(MK_OP(bit, reg) | KS_SPIOP_WR); - txb[1] = val; - - xfer->tx_buf = txb; - xfer->rx_buf = NULL; - xfer->len = 3; - - ret = spi_sync(ks->spidev, msg); - if (ret < 0) - netdev_err(ks->netdev, "spi_sync() failed\n"); -} - -/** - * ks8851_rdreg - issue read register command and return the data - * @ks: The device state - * @op: The register address and byte enables in message format. - * @rxb: The RX buffer to return the result into - * @rxl: The length of data expected. - * - * This is the low level read call that issues the necessary spi message(s) - * to read data from the register specified in @op. - */ -static void ks8851_rdreg(struct ks8851_net *ks, unsigned op, - u8 *rxb, unsigned rxl) -{ - struct spi_transfer *xfer; - struct spi_message *msg; - __le16 *txb = (__le16 *)ks->txd; - u8 *trx = ks->rxd; - int ret; - - txb[0] = cpu_to_le16(op | KS_SPIOP_RD); - - if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) { - msg = &ks->spi_msg2; - xfer = ks->spi_xfer2; - - xfer->tx_buf = txb; - xfer->rx_buf = NULL; - xfer->len = 2; - - xfer++; - xfer->tx_buf = NULL; - xfer->rx_buf = trx; - xfer->len = rxl; - } else { - msg = &ks->spi_msg1; - xfer = &ks->spi_xfer1; - - xfer->tx_buf = txb; - xfer->rx_buf = trx; - xfer->len = rxl + 2; - } - - ret = spi_sync(ks->spidev, msg); - if (ret < 0) - netdev_err(ks->netdev, "read: spi_sync() failed\n"); - else if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) - memcpy(rxb, trx, rxl); - else - memcpy(rxb, trx + 2, rxl); -} - -/** - * ks8851_rdreg8 - read 8 bit register from device - * @ks: The chip information - * @reg: The register address - * - * Read a 8bit register from the chip, returning the result -*/ -static unsigned ks8851_rdreg8(struct ks8851_net *ks, unsigned reg) +static void ks8851_wrreg16(struct ks8851_net *ks, unsigned int reg, + unsigned int val) { - u8 rxb[1]; - - ks8851_rdreg(ks, MK_OP(1 << (reg & 3), reg), rxb, 1); - return rxb[0]; + ks->wrreg16(ks, reg, val); } /** @@ -287,32 +71,11 @@ static unsigned ks8851_rdreg8(struct ks8851_net *ks, unsigned reg) * @reg: The register address * * Read a 16bit register from the chip, returning the result -*/ -static unsigned ks8851_rdreg16(struct ks8851_net *ks, unsigned reg) -{ - __le16 rx = 0; - - ks8851_rdreg(ks, MK_OP(reg & 2 ? 0xC : 0x3, reg), (u8 *)&rx, 2); - return le16_to_cpu(rx); -} - -/** - * ks8851_rdreg32 - read 32 bit register from device - * @ks: The chip information - * @reg: The register address - * - * Read a 32bit register from the chip. - * - * Note, this read requires the address be aligned to 4 bytes. -*/ -static unsigned ks8851_rdreg32(struct ks8851_net *ks, unsigned reg) + */ +static unsigned int ks8851_rdreg16(struct ks8851_net *ks, + unsigned int reg) { - __le32 rx = 0; - - WARN_ON(reg & 3); - - ks8851_rdreg(ks, MK_OP(0xf, reg), (u8 *)&rx, 4); - return le32_to_cpu(rx); + return ks->rdreg16(ks, reg); } /** @@ -368,21 +131,27 @@ static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode) static int ks8851_write_mac_addr(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; + u16 val; int i; - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); /* * Wake up chip in case it was powered off when stopped; otherwise, * the first write to the MAC address does not take effect. */ ks8851_set_powermode(ks, PMECR_PM_NORMAL); - for (i = 0; i < ETH_ALEN; i++) - ks8851_wrreg8(ks, KS_MAR(i), dev->dev_addr[i]); + + for (i = 0; i < ETH_ALEN; i += 2) { + val = (dev->dev_addr[i] << 8) | dev->dev_addr[i + 1]; + ks8851_wrreg16(ks, KS_MAR(i), val); + } + if (!netif_running(dev)) ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); return 0; } @@ -396,19 +165,25 @@ static int ks8851_write_mac_addr(struct net_device *dev) static void ks8851_read_mac_addr(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; + u16 reg; int i; - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = ks8851_rdreg8(ks, KS_MAR(i)); + for (i = 0; i < ETH_ALEN; i += 2) { + reg = ks8851_rdreg16(ks, KS_MAR(i)); + dev->dev_addr[i] = reg >> 8; + dev->dev_addr[i + 1] = reg & 0xff; + } - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); } /** * ks8851_init_mac - initialise the mac address * @ks: The device structure + * @np: The device node pointer * * Get or create the initial mac address for the device and then set that * into the station address register. A mac address supplied in the device @@ -416,12 +191,12 @@ static void ks8851_read_mac_addr(struct net_device *dev) * we try that. If no valid mac address is found we use eth_random_addr() * to create a new one. */ -static void ks8851_init_mac(struct ks8851_net *ks) +static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np) { struct net_device *dev = ks->netdev; const u8 *mac_addr; - mac_addr = of_get_mac_address(ks->spidev->dev.of_node); + mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) { ether_addr_copy(dev->dev_addr, mac_addr); ks8851_write_mac_addr(dev); @@ -442,48 +217,12 @@ static void ks8851_init_mac(struct ks8851_net *ks) } /** - * ks8851_rdfifo - read data from the receive fifo - * @ks: The device state. - * @buff: The buffer address - * @len: The length of the data to read - * - * Issue an RXQ FIFO read command and read the @len amount of data from - * the FIFO into the buffer specified by @buff. - */ -static void ks8851_rdfifo(struct ks8851_net *ks, u8 *buff, unsigned len) -{ - struct spi_transfer *xfer = ks->spi_xfer2; - struct spi_message *msg = &ks->spi_msg2; - u8 txb[1]; - int ret; - - netif_dbg(ks, rx_status, ks->netdev, - "%s: %d@%p\n", __func__, len, buff); - - /* set the operation we're issuing */ - txb[0] = KS_SPIOP_RXFIFO; - - xfer->tx_buf = txb; - xfer->rx_buf = NULL; - xfer->len = 1; - - xfer++; - xfer->rx_buf = buff; - xfer->tx_buf = NULL; - xfer->len = len; - - ret = spi_sync(ks->spidev, msg); - if (ret < 0) - netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); -} - -/** * ks8851_dbg_dumpkkt - dump initial packet contents to debug * @ks: The device state * @rxpkt: The data for the received packet * * Dump the initial data from the packet to dev_dbg(). -*/ + */ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) { netdev_dbg(ks->netdev, @@ -494,6 +233,16 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) } /** + * ks8851_rx_skb - receive skbuff + * @ks: The device state. + * @skb: The skbuff + */ +static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb) +{ + ks->rx_skb(ks, skb); +} + +/** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. * @@ -507,10 +256,9 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) unsigned rxfc; unsigned rxlen; unsigned rxstat; - u32 rxh; u8 *rxpkt; - rxfc = ks8851_rdreg8(ks, KS_RXFC); + rxfc = (ks8851_rdreg16(ks, KS_RXFCTR) >> 8) & 0xff; netif_dbg(ks, rx_status, ks->netdev, "%s: %d packets\n", __func__, rxfc); @@ -526,9 +274,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) */ for (; rxfc != 0; rxfc--) { - rxh = ks8851_rdreg32(ks, KS_RXFHSR); - rxstat = rxh & 0xffff; - rxlen = (rxh >> 16) & 0xfff; + rxstat = ks8851_rdreg16(ks, KS_RXFHSR); + rxlen = ks8851_rdreg16(ks, KS_RXFHBCR) & RXFHBCR_CNT_MASK; netif_dbg(ks, rx_status, ks->netdev, "rx: stat 0x%04x, len 0x%04x\n", rxstat, rxlen); @@ -557,13 +304,13 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) rxpkt = skb_put(skb, rxlen) - 8; - ks8851_rdfifo(ks, rxpkt, rxalign + 8); + ks->rdfifo(ks, rxpkt, rxalign + 8); if (netif_msg_pktdata(ks)) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - netif_rx_ni(skb); + ks8851_rx_skb(ks, skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -590,10 +337,11 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; - unsigned status; unsigned handled = 0; + unsigned long flags; + unsigned int status; - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); @@ -631,7 +379,7 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) handled |= IRQ_RXI; if (status & IRQ_SPIBEI) { - dev_err(&ks->spidev->dev, "%s: spi bus error\n", __func__); + netdev_err(ks->netdev, "%s: spi bus error\n", __func__); handled |= IRQ_SPIBEI; } @@ -662,7 +410,7 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1); } - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); if (status & IRQ_LCI) mii_check_link(&ks->mii); @@ -674,108 +422,13 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) } /** - * calc_txlen - calculate size of message to send packet - * @len: Length of data - * - * Returns the size of the TXFIFO message needed to send - * this packet. - */ -static inline unsigned calc_txlen(unsigned len) -{ - return ALIGN(len + 4, 4); -} - -/** - * ks8851_wrpkt - write packet to TX FIFO - * @ks: The device state. - * @txp: The sk_buff to transmit. - * @irq: IRQ on completion of the packet. - * - * Send the @txp to the chip. This means creating the relevant packet header - * specifying the length of the packet and the other information the chip - * needs, such as IRQ on completion. Send the header and the packet data to - * the device. - */ -static void ks8851_wrpkt(struct ks8851_net *ks, struct sk_buff *txp, bool irq) -{ - struct spi_transfer *xfer = ks->spi_xfer2; - struct spi_message *msg = &ks->spi_msg2; - unsigned fid = 0; - int ret; - - netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n", - __func__, txp, txp->len, txp->data, irq); - - fid = ks->fid++; - fid &= TXFR_TXFID_MASK; - - if (irq) - fid |= TXFR_TXIC; /* irq on completion */ - - /* start header at txb[1] to align txw entries */ - ks->txh.txb[1] = KS_SPIOP_TXFIFO; - ks->txh.txw[1] = cpu_to_le16(fid); - ks->txh.txw[2] = cpu_to_le16(txp->len); - - xfer->tx_buf = &ks->txh.txb[1]; - xfer->rx_buf = NULL; - xfer->len = 5; - - xfer++; - xfer->tx_buf = txp->data; - xfer->rx_buf = NULL; - xfer->len = ALIGN(txp->len, 4); - - ret = spi_sync(ks->spidev, msg); - if (ret < 0) - netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); -} - -/** - * ks8851_done_tx - update and then free skbuff after transmitting + * ks8851_flush_tx_work - flush outstanding TX work * @ks: The device state - * @txb: The buffer transmitted - */ -static void ks8851_done_tx(struct ks8851_net *ks, struct sk_buff *txb) -{ - struct net_device *dev = ks->netdev; - - dev->stats.tx_bytes += txb->len; - dev->stats.tx_packets++; - - dev_kfree_skb(txb); -} - -/** - * ks8851_tx_work - process tx packet(s) - * @work: The work strucutre what was scheduled. - * - * This is called when a number of packets have been scheduled for - * transmission and need to be sent to the device. */ -static void ks8851_tx_work(struct work_struct *work) +static void ks8851_flush_tx_work(struct ks8851_net *ks) { - struct ks8851_net *ks = container_of(work, struct ks8851_net, tx_work); - struct sk_buff *txb; - bool last = skb_queue_empty(&ks->txq); - - mutex_lock(&ks->lock); - - while (!last) { - txb = skb_dequeue(&ks->txq); - last = skb_queue_empty(&ks->txq); - - if (txb != NULL) { - ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); - ks8851_wrpkt(ks, txb, last); - ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); - ks8851_wrreg16(ks, KS_TXQCR, TXQCR_METFE); - - ks8851_done_tx(ks, txb); - } - } - - mutex_unlock(&ks->lock); + if (ks->flush_tx_work) + ks->flush_tx_work(ks); } /** @@ -788,6 +441,7 @@ static void ks8851_tx_work(struct work_struct *work) static int ks8851_net_open(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; int ret; ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, @@ -800,7 +454,7 @@ static int ks8851_net_open(struct net_device *dev) /* lock the card, even if we may not actually be doing anything * else at the moment */ - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); netif_dbg(ks, ifup, ks->netdev, "opening\n"); @@ -844,23 +498,14 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* clear then enable interrupts */ - -#define STD_IRQ (IRQ_LCI | /* Link Change */ \ - IRQ_TXI | /* TX done */ \ - IRQ_RXI | /* RX done */ \ - IRQ_SPIBEI | /* SPI bus error */ \ - IRQ_TXPSI | /* TX process stop */ \ - IRQ_RXPSI) /* RX process stop */ - - ks->rc_ier = STD_IRQ; - ks8851_wrreg16(ks, KS_ISR, STD_IRQ); - ks8851_wrreg16(ks, KS_IER, STD_IRQ); + ks8851_wrreg16(ks, KS_ISR, ks->rc_ier); + ks8851_wrreg16(ks, KS_IER, ks->rc_ier); netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); mii_check_link(&ks->mii); return 0; } @@ -876,22 +521,23 @@ static int ks8851_net_open(struct net_device *dev) static int ks8851_net_stop(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; netif_info(ks, ifdown, dev, "shutting down\n"); netif_stop_queue(dev); - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); /* turn off the IRQs and ack any outstanding */ ks8851_wrreg16(ks, KS_IER, 0x0000); ks8851_wrreg16(ks, KS_ISR, 0xffff); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); /* stop any outstanding work */ - flush_work(&ks->tx_work); + ks8851_flush_tx_work(ks); flush_work(&ks->rxctrl_work); - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); /* shutdown RX process */ ks8851_wrreg16(ks, KS_RXCR1, 0x0000); @@ -900,7 +546,7 @@ static int ks8851_net_stop(struct net_device *dev) /* set powermode to soft power down to save power */ ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); /* ensure any queued tx buffers are dumped */ while (!skb_queue_empty(&ks->txq)) { @@ -934,26 +580,8 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); - unsigned needed = calc_txlen(skb->len); - netdev_tx_t ret = NETDEV_TX_OK; - - netif_dbg(ks, tx_queued, ks->netdev, - "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data); - spin_lock(&ks->statelock); - - if (needed > ks->tx_space) { - netif_stop_queue(dev); - ret = NETDEV_TX_BUSY; - } else { - ks->tx_space -= needed; - skb_queue_tail(&ks->txq, skb); - } - - spin_unlock(&ks->statelock); - schedule_work(&ks->tx_work); - - return ret; + return ks->start_xmit(skb, dev); } /** @@ -972,13 +600,14 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb, static void ks8851_rxctrl_work(struct work_struct *work) { struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work); + unsigned long flags; - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); /* need to shutdown RXQ before modifying filter parameters */ ks8851_wrreg16(ks, KS_RXCR1, 0x00); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); } static void ks8851_set_rx_mode(struct net_device *dev) @@ -1160,11 +789,6 @@ static void ks8851_eeprom_regwrite(struct eeprom_93cx6 *ee) */ static int ks8851_eeprom_claim(struct ks8851_net *ks) { - if (!(ks->rc_ccr & CCR_EEPROM)) - return -ENOENT; - - mutex_lock(&ks->lock); - /* start with clock low, cs high */ ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS); return 0; @@ -1181,7 +805,6 @@ static void ks8851_eeprom_release(struct ks8851_net *ks) unsigned val = ks8851_rdreg16(ks, KS_EEPCR); ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA); - mutex_unlock(&ks->lock); } #define KS_EEPROM_MAGIC (0x00008851) @@ -1191,6 +814,7 @@ static int ks8851_set_eeprom(struct net_device *dev, { struct ks8851_net *ks = netdev_priv(dev); int offset = ee->offset; + unsigned long flags; int len = ee->len; u16 tmp; @@ -1201,9 +825,13 @@ static int ks8851_set_eeprom(struct net_device *dev, if (ee->magic != KS_EEPROM_MAGIC) return -EINVAL; - if (ks8851_eeprom_claim(ks)) + if (!(ks->rc_ccr & CCR_EEPROM)) return -ENOENT; + ks8851_lock(ks, &flags); + + ks8851_eeprom_claim(ks); + eeprom_93cx6_wren(&ks->eeprom, true); /* ethtool currently only supports writing bytes, which means @@ -1223,6 +851,7 @@ static int ks8851_set_eeprom(struct net_device *dev, eeprom_93cx6_wren(&ks->eeprom, false); ks8851_eeprom_release(ks); + ks8851_unlock(ks, &flags); return 0; } @@ -1232,19 +861,25 @@ static int ks8851_get_eeprom(struct net_device *dev, { struct ks8851_net *ks = netdev_priv(dev); int offset = ee->offset; + unsigned long flags; int len = ee->len; /* must be 2 byte aligned */ if (len & 1 || offset & 1) return -EINVAL; - if (ks8851_eeprom_claim(ks)) + if (!(ks->rc_ccr & CCR_EEPROM)) return -ENOENT; + ks8851_lock(ks, &flags); + + ks8851_eeprom_claim(ks); + ee->magic = KS_EEPROM_MAGIC; eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2); ks8851_eeprom_release(ks); + ks8851_unlock(ks, &flags); return 0; } @@ -1318,6 +953,7 @@ static int ks8851_phy_reg(int reg) static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; int ksreg; int result; @@ -1325,9 +961,9 @@ static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg) if (!ksreg) return 0x0; /* no error return allowed, so use zero */ - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); result = ks8851_rdreg16(ks, ksreg); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); return result; } @@ -1336,13 +972,14 @@ static void ks8851_phy_write(struct net_device *dev, int phy, int reg, int value) { struct ks8851_net *ks = netdev_priv(dev); + unsigned long flags; int ksreg; ksreg = ks8851_phy_reg(reg); if (ksreg) { - mutex_lock(&ks->lock); + ks8851_lock(ks, &flags); ks8851_wrreg16(ks, ksreg, value); - mutex_unlock(&ks->lock); + ks8851_unlock(ks, &flags); } } @@ -1382,7 +1019,7 @@ static int ks8851_read_selftest(struct ks8851_net *ks) #ifdef CONFIG_PM_SLEEP -static int ks8851_suspend(struct device *dev) +int ks8851_suspend(struct device *dev) { struct ks8851_net *ks = dev_get_drvdata(dev); struct net_device *netdev = ks->netdev; @@ -1395,7 +1032,7 @@ static int ks8851_suspend(struct device *dev) return 0; } -static int ks8851_resume(struct device *dev) +int ks8851_resume(struct device *dev) { struct ks8851_net *ks = dev_get_drvdata(dev); struct net_device *netdev = ks->netdev; @@ -1409,46 +1046,32 @@ static int ks8851_resume(struct device *dev) } #endif -static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume); - -static int ks8851_probe(struct spi_device *spi) +int ks8851_probe_common(struct net_device *netdev, struct device *dev, + int msg_en) { - struct net_device *ndev; - struct ks8851_net *ks; - int ret; + struct ks8851_net *ks = netdev_priv(netdev); unsigned cider; int gpio; + int ret; - ndev = alloc_etherdev(sizeof(struct ks8851_net)); - if (!ndev) - return -ENOMEM; - - spi->bits_per_word = 8; - - ks = netdev_priv(ndev); - - ks->netdev = ndev; - ks->spidev = spi; + ks->netdev = netdev; ks->tx_space = 6144; - gpio = of_get_named_gpio_flags(spi->dev.of_node, "reset-gpios", - 0, NULL); - if (gpio == -EPROBE_DEFER) { - ret = gpio; - goto err_gpio; - } + gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL); + if (gpio == -EPROBE_DEFER) + return gpio; ks->gpio = gpio; if (gpio_is_valid(gpio)) { - ret = devm_gpio_request_one(&spi->dev, gpio, + ret = devm_gpio_request_one(dev, gpio, GPIOF_OUT_INIT_LOW, "ks8851_rst_n"); if (ret) { - dev_err(&spi->dev, "reset gpio request failed\n"); - goto err_gpio; + dev_err(dev, "reset gpio request failed\n"); + return ret; } } - ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io"); + ks->vdd_io = devm_regulator_get(dev, "vdd-io"); if (IS_ERR(ks->vdd_io)) { ret = PTR_ERR(ks->vdd_io); goto err_reg_io; @@ -1456,12 +1079,11 @@ static int ks8851_probe(struct spi_device *spi) ret = regulator_enable(ks->vdd_io); if (ret) { - dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n", - ret); + dev_err(dev, "regulator vdd_io enable fail: %d\n", ret); goto err_reg_io; } - ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd"); + ks->vdd_reg = devm_regulator_get(dev, "vdd"); if (IS_ERR(ks->vdd_reg)) { ret = PTR_ERR(ks->vdd_reg); goto err_reg; @@ -1469,8 +1091,7 @@ static int ks8851_probe(struct spi_device *spi) ret = regulator_enable(ks->vdd_reg); if (ret) { - dev_err(&spi->dev, "regulator vdd enable fail: %d\n", - ret); + dev_err(dev, "regulator vdd enable fail: %d\n", ret); goto err_reg; } @@ -1479,54 +1100,41 @@ static int ks8851_probe(struct spi_device *spi) gpio_set_value(gpio, 1); } - mutex_init(&ks->lock); spin_lock_init(&ks->statelock); - INIT_WORK(&ks->tx_work, ks8851_tx_work); INIT_WORK(&ks->rxctrl_work, ks8851_rxctrl_work); - /* initialise pre-made spi transfer messages */ - - spi_message_init(&ks->spi_msg1); - spi_message_add_tail(&ks->spi_xfer1, &ks->spi_msg1); - - spi_message_init(&ks->spi_msg2); - spi_message_add_tail(&ks->spi_xfer2[0], &ks->spi_msg2); - spi_message_add_tail(&ks->spi_xfer2[1], &ks->spi_msg2); - /* setup EEPROM state */ - ks->eeprom.data = ks; ks->eeprom.width = PCI_EEPROM_WIDTH_93C46; ks->eeprom.register_read = ks8851_eeprom_regread; ks->eeprom.register_write = ks8851_eeprom_regwrite; /* setup mii state */ - ks->mii.dev = ndev; + ks->mii.dev = netdev; ks->mii.phy_id = 1, ks->mii.phy_id_mask = 1; ks->mii.reg_num_mask = 0xf; ks->mii.mdio_read = ks8851_phy_read; ks->mii.mdio_write = ks8851_phy_write; - dev_info(&spi->dev, "message enable is %d\n", msg_enable); + dev_info(dev, "message enable is %d\n", msg_en); /* set the default message enable */ - ks->msg_enable = netif_msg_init(msg_enable, (NETIF_MSG_DRV | - NETIF_MSG_PROBE | - NETIF_MSG_LINK)); + ks->msg_enable = netif_msg_init(msg_en, NETIF_MSG_DRV | + NETIF_MSG_PROBE | + NETIF_MSG_LINK); skb_queue_head_init(&ks->txq); - ndev->ethtool_ops = &ks8851_ethtool_ops; - SET_NETDEV_DEV(ndev, &spi->dev); + netdev->ethtool_ops = &ks8851_ethtool_ops; + SET_NETDEV_DEV(netdev, dev); - spi_set_drvdata(spi, ks); + dev_set_drvdata(dev, ks); netif_carrier_off(ks->netdev); - ndev->if_port = IF_PORT_100BASET; - ndev->netdev_ops = &ks8851_netdev_ops; - ndev->irq = spi->irq; + netdev->if_port = IF_PORT_100BASET; + netdev->netdev_ops = &ks8851_netdev_ops; /* issue a global soft reset to reset the device. */ ks8851_soft_reset(ks, GRR_GSR); @@ -1534,7 +1142,7 @@ static int ks8851_probe(struct spi_device *spi) /* simple check for a valid chip being connected to the bus */ cider = ks8851_rdreg16(ks, KS_CIDER); if ((cider & ~CIDER_REV_MASK) != CIDER_ID) { - dev_err(&spi->dev, "failed to read device ID\n"); + dev_err(dev, "failed to read device ID\n"); ret = -ENODEV; goto err_id; } @@ -1543,16 +1151,16 @@ static int ks8851_probe(struct spi_device *spi) ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR); ks8851_read_selftest(ks); - ks8851_init_mac(ks); + ks8851_init_mac(ks, dev->of_node); - ret = register_netdev(ndev); + ret = register_netdev(netdev); if (ret) { - dev_err(&spi->dev, "failed to register network device\n"); + dev_err(dev, "failed to register network device\n"); goto err_netdev; } - netdev_info(ndev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n", - CIDER_REV_GET(cider), ndev->dev_addr, ndev->irq, + netdev_info(netdev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n", + CIDER_REV_GET(cider), netdev->dev_addr, netdev->irq, ks->rc_ccr & CCR_EEPROM ? "has" : "no"); return 0; @@ -1565,49 +1173,21 @@ err_id: err_reg: regulator_disable(ks->vdd_io); err_reg_io: -err_gpio: - free_netdev(ndev); return ret; } -static int ks8851_remove(struct spi_device *spi) +int ks8851_remove_common(struct device *dev) { - struct ks8851_net *priv = spi_get_drvdata(spi); + struct ks8851_net *priv = dev_get_drvdata(dev); if (netif_msg_drv(priv)) - dev_info(&spi->dev, "remove\n"); + dev_info(dev, "remove\n"); unregister_netdev(priv->netdev); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); regulator_disable(priv->vdd_reg); regulator_disable(priv->vdd_io); - free_netdev(priv->netdev); return 0; } - -static const struct of_device_id ks8851_match_table[] = { - { .compatible = "micrel,ks8851" }, - { } -}; -MODULE_DEVICE_TABLE(of, ks8851_match_table); - -static struct spi_driver ks8851_driver = { - .driver = { - .name = "ks8851", - .of_match_table = ks8851_match_table, - .pm = &ks8851_pm_ops, - }, - .probe = ks8851_probe, - .remove = ks8851_remove, -}; -module_spi_driver(ks8851_driver); - -MODULE_DESCRIPTION("KS8851 Network driver"); -MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); -MODULE_LICENSE("GPL"); - -module_param_named(message, msg_enable, int, 0); -MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)"); -MODULE_ALIAS("spi:ks8851"); diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c deleted file mode 100644 index 45cc840d8e2e..000000000000 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ /dev/null @@ -1,1393 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/** - * drivers/net/ethernet/micrel/ks8851_mll.c - * Copyright (c) 2009 Micrel Inc. - */ - -/* Supports: - * KS8851 16bit MLL chip from Micrel Inc. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/cache.h> -#include <linux/crc32.h> -#include <linux/crc32poly.h> -#include <linux/mii.h> -#include <linux/platform_device.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/ks8851_mll.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/of_net.h> - -#include "ks8851.h" - -#define DRV_NAME "ks8851_mll" - -static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; -#define MAX_RECV_FRAMES 255 -#define MAX_BUF_SIZE 2048 -#define TX_BUF_SIZE 2000 -#define RX_BUF_SIZE 2000 - -#define RXCR1_FILTER_MASK (RXCR1_RXINVF | RXCR1_RXAE | \ - RXCR1_RXMAFMA | RXCR1_RXPAFMA) -#define RXQCR_CMD_CNTL (RXQCR_RXFCTE|RXQCR_ADRFE) - -#define ENUM_BUS_NONE 0 -#define ENUM_BUS_8BIT 1 -#define ENUM_BUS_16BIT 2 -#define ENUM_BUS_32BIT 3 - -#define MAX_MCAST_LST 32 -#define HW_MCAST_SIZE 8 - -/** - * union ks_tx_hdr - tx header data - * @txb: The header as bytes - * @txw: The header as 16bit, little-endian words - * - * A dual representation of the tx header data to allow - * access to individual bytes, and to allow 16bit accesses - * with 16bit alignment. - */ -union ks_tx_hdr { - u8 txb[4]; - __le16 txw[2]; -}; - -/** - * struct ks_net - KS8851 driver private data - * @net_device : The network device we're bound to - * @hw_addr : start address of data register. - * @hw_addr_cmd : start address of command register. - * @txh : temporaly buffer to save status/length. - * @lock : Lock to ensure that the device is not accessed when busy. - * @pdev : Pointer to platform device. - * @mii : The MII state information for the mii calls. - * @frame_head_info : frame header information for multi-pkt rx. - * @statelock : Lock on this structure for tx list. - * @msg_enable : The message flags controlling driver output (see ethtool). - * @frame_cnt : number of frames received. - * @bus_width : i/o bus width. - * @rc_rxqcr : Cached copy of KS_RXQCR. - * @rc_txcr : Cached copy of KS_TXCR. - * @rc_ier : Cached copy of KS_IER. - * @sharedbus : Multipex(addr and data bus) mode indicator. - * @cmd_reg_cache : command register cached. - * @cmd_reg_cache_int : command register cached. Used in the irq handler. - * @promiscuous : promiscuous mode indicator. - * @all_mcast : mutlicast indicator. - * @mcast_lst_size : size of multicast list. - * @mcast_lst : multicast list. - * @mcast_bits : multicast enabed. - * @mac_addr : MAC address assigned to this device. - * @fid : frame id. - * @extra_byte : number of extra byte prepended rx pkt. - * @enabled : indicator this device works. - * - * The @lock ensures that the chip is protected when certain operations are - * in progress. When the read or write packet transfer is in progress, most - * of the chip registers are not accessible until the transfer is finished and - * the DMA has been de-asserted. - * - * The @statelock is used to protect information in the structure which may - * need to be accessed via several sources, such as the network driver layer - * or one of the work queues. - * - */ - -/* Receive multiplex framer header info */ -struct type_frame_head { - u16 sts; /* Frame status */ - u16 len; /* Byte count */ -}; - -struct ks_net { - struct net_device *netdev; - void __iomem *hw_addr; - void __iomem *hw_addr_cmd; - union ks_tx_hdr txh ____cacheline_aligned; - struct mutex lock; /* spinlock to be interrupt safe */ - struct platform_device *pdev; - struct mii_if_info mii; - struct type_frame_head *frame_head_info; - spinlock_t statelock; - u32 msg_enable; - u32 frame_cnt; - int bus_width; - - u16 rc_rxqcr; - u16 rc_txcr; - u16 rc_ier; - u16 sharedbus; - u16 cmd_reg_cache; - u16 cmd_reg_cache_int; - u16 promiscuous; - u16 all_mcast; - u16 mcast_lst_size; - u8 mcast_lst[MAX_MCAST_LST][ETH_ALEN]; - u8 mcast_bits[HW_MCAST_SIZE]; - u8 mac_addr[6]; - u8 fid; - u8 extra_byte; - u8 enabled; -}; - -static int msg_enable; - -#define BE3 0x8000 /* Byte Enable 3 */ -#define BE2 0x4000 /* Byte Enable 2 */ -#define BE1 0x2000 /* Byte Enable 1 */ -#define BE0 0x1000 /* Byte Enable 0 */ - -/* register read/write calls. - * - * All these calls issue transactions to access the chip's registers. They - * all require that the necessary lock is held to prevent accesses when the - * chip is busy transferring packet data (RX/TX FIFO accesses). - */ - -/** - * ks_check_endian - Check whether endianness of the bus is correct - * @ks : The chip information - * - * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit - * bus. To maintain optimum performance, the bus endianness should be set - * such that it matches the endianness of the CPU. - */ - -static int ks_check_endian(struct ks_net *ks) -{ - u16 cider; - - /* - * Read CIDER register first, however read it the "wrong" way around. - * If the endian strap on the KS8851-16MLL in incorrect and the chip - * is operating in different endianness than the CPU, then the meaning - * of BE[3:0] byte-enable bits is also swapped such that: - * BE[3,2,1,0] becomes BE[1,0,3,2] - * - * Luckily for us, the byte-enable bits are the top four MSbits of - * the address register and the CIDER register is at offset 0xc0. - * Hence, by reading address 0xc0c0, which is not impacted by endian - * swapping, we assert either BE[3:2] or BE[1:0] while reading the - * CIDER register. - * - * If the bus configuration is correct, reading 0xc0c0 asserts - * BE[3:2] and this read returns 0x0000, because to read register - * with bottom two LSbits of address set to 0, BE[1:0] must be - * asserted. - * - * If the bus configuration is NOT correct, reading 0xc0c0 asserts - * BE[1:0] and this read returns non-zero 0x8872 value. - */ - iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd); - cider = ioread16(ks->hw_addr); - if (!cider) - return 0; - - netdev_err(ks->netdev, "incorrect EESK endian strap setting\n"); - - return -EINVAL; -} - -/** - * ks_rdreg16 - read 16 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 16bit register from the chip, returning the result - */ - -static u16 ks_rdreg16(struct ks_net *ks, int offset) -{ - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - return ioread16(ks->hw_addr); -} - -/** - * ks_wrreg16 - write 16bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ - -static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) -{ - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value, ks->hw_addr); -} - -/** - * ks_inblk - read a block of data from QMU. This is called after sudo DMA mode enabled. - * @ks: The chip state - * @wptr: buffer address to save data - * @len: length in byte to read - * - */ -static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) -{ - len >>= 1; - while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); -} - -/** - * ks_outblk - write data to QMU. This is called after sudo DMA mode enabled. - * @ks: The chip information - * @wptr: buffer address - * @len: length in byte to write - * - */ -static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) -{ - len >>= 1; - while (len--) - iowrite16(*wptr++, ks->hw_addr); -} - -static void ks_disable_int(struct ks_net *ks) -{ - ks_wrreg16(ks, KS_IER, 0x0000); -} /* ks_disable_int */ - -static void ks_enable_int(struct ks_net *ks) -{ - ks_wrreg16(ks, KS_IER, ks->rc_ier); -} /* ks_enable_int */ - -/** - * ks_tx_fifo_space - return the available hardware buffer size. - * @ks: The chip information - * - */ -static inline u16 ks_tx_fifo_space(struct ks_net *ks) -{ - return ks_rdreg16(ks, KS_TXMIR) & 0x1fff; -} - -/** - * ks_save_cmd_reg - save the command register from the cache. - * @ks: The chip information - * - */ -static inline void ks_save_cmd_reg(struct ks_net *ks) -{ - /*ks8851 MLL has a bug to read back the command register. - * So rely on software to save the content of command register. - */ - ks->cmd_reg_cache_int = ks->cmd_reg_cache; -} - -/** - * ks_restore_cmd_reg - restore the command register from the cache and - * write to hardware register. - * @ks: The chip information - * - */ -static inline void ks_restore_cmd_reg(struct ks_net *ks) -{ - ks->cmd_reg_cache = ks->cmd_reg_cache_int; - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); -} - -/** - * ks_set_powermode - set power mode of the device - * @ks: The chip information - * @pwrmode: The power mode value to write to KS_PMECR. - * - * Change the power mode of the chip. - */ -static void ks_set_powermode(struct ks_net *ks, unsigned pwrmode) -{ - unsigned pmecr; - - netif_dbg(ks, hw, ks->netdev, "setting power mode %d\n", pwrmode); - - ks_rdreg16(ks, KS_GRR); - pmecr = ks_rdreg16(ks, KS_PMECR); - pmecr &= ~PMECR_PM_MASK; - pmecr |= pwrmode; - - ks_wrreg16(ks, KS_PMECR, pmecr); -} - -/** - * ks_read_config - read chip configuration of bus width. - * @ks: The chip information - * - */ -static void ks_read_config(struct ks_net *ks) -{ - u16 reg_data = 0; - - /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg16(ks, KS_CCR); - - /* addr/data bus are multiplexed */ - ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; - - /* There are garbage data when reading data from QMU, - depending on bus-width. - */ - - if (reg_data & CCR_8BIT) { - ks->bus_width = ENUM_BUS_8BIT; - ks->extra_byte = 1; - } else if (reg_data & CCR_16BIT) { - ks->bus_width = ENUM_BUS_16BIT; - ks->extra_byte = 2; - } else { - ks->bus_width = ENUM_BUS_32BIT; - ks->extra_byte = 4; - } -} - -/** - * ks_soft_reset - issue one of the soft reset to the device - * @ks: The device state. - * @op: The bit(s) to set in the GRR - * - * Issue the relevant soft-reset command to the device's GRR register - * specified by @op. - * - * Note, the delays are in there as a caution to ensure that the reset - * has time to take effect and then complete. Since the datasheet does - * not currently specify the exact sequence, we have chosen something - * that seems to work with our device. - */ -static void ks_soft_reset(struct ks_net *ks, unsigned op) -{ - /* Disable interrupt first */ - ks_wrreg16(ks, KS_IER, 0x0000); - ks_wrreg16(ks, KS_GRR, op); - mdelay(10); /* wait a short time to effect reset */ - ks_wrreg16(ks, KS_GRR, 0); - mdelay(1); /* wait for condition to clear */ -} - - -static void ks_enable_qmu(struct ks_net *ks) -{ - u16 w; - - w = ks_rdreg16(ks, KS_TXCR); - /* Enables QMU Transmit (TXCR). */ - ks_wrreg16(ks, KS_TXCR, w | TXCR_TXE); - - /* - * RX Frame Count Threshold Enable and Auto-Dequeue RXQ Frame - * Enable - */ - - w = ks_rdreg16(ks, KS_RXQCR); - ks_wrreg16(ks, KS_RXQCR, w | RXQCR_RXFCTE); - - /* Enables QMU Receive (RXCR1). */ - w = ks_rdreg16(ks, KS_RXCR1); - ks_wrreg16(ks, KS_RXCR1, w | RXCR1_RXE); - ks->enabled = true; -} /* ks_enable_qmu */ - -static void ks_disable_qmu(struct ks_net *ks) -{ - u16 w; - - w = ks_rdreg16(ks, KS_TXCR); - - /* Disables QMU Transmit (TXCR). */ - w &= ~TXCR_TXE; - ks_wrreg16(ks, KS_TXCR, w); - - /* Disables QMU Receive (RXCR1). */ - w = ks_rdreg16(ks, KS_RXCR1); - w &= ~RXCR1_RXE ; - ks_wrreg16(ks, KS_RXCR1, w); - - ks->enabled = false; - -} /* ks_disable_qmu */ - -/** - * ks_read_qmu - read 1 pkt data from the QMU. - * @ks: The chip information - * @buf: buffer address to save 1 pkt - * @len: Pkt length - * Here is the sequence to read 1 pkt: - * 1. set sudo DMA mode - * 2. read prepend data - * 3. read pkt data - * 4. reset sudo DMA Mode - */ -static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) -{ - u32 r = ks->extra_byte & 0x1 ; - u32 w = ks->extra_byte - r; - - /* 1. set sudo DMA mode */ - ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); - - /* 2. read prepend data */ - /** - * read 4 + extra bytes and discard them. - * extra bytes for dummy, 2 for status, 2 for len - */ - - /* use likely(r) for 8 bit access for performance */ - if (unlikely(r)) - ioread8(ks->hw_addr); - ks_inblk(ks, buf, w + 2 + 2); - - /* 3. read pkt data */ - ks_inblk(ks, buf, ALIGN(len, 4)); - - /* 4. reset sudo DMA Mode */ - ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); -} - -/** - * ks_rcv - read multiple pkts data from the QMU. - * @ks: The chip information - * @netdev: The network device being opened. - * - * Read all of header information before reading pkt content. - * It is not allowed only port of pkts in QMU after issuing - * interrupt ack. - */ -static void ks_rcv(struct ks_net *ks, struct net_device *netdev) -{ - u32 i; - struct type_frame_head *frame_hdr = ks->frame_head_info; - struct sk_buff *skb; - - ks->frame_cnt = ks_rdreg16(ks, KS_RXFCTR) >> 8; - - /* read all header information */ - for (i = 0; i < ks->frame_cnt; i++) { - /* Checking Received packet status */ - frame_hdr->sts = ks_rdreg16(ks, KS_RXFHSR); - /* Get packet len from hardware */ - frame_hdr->len = ks_rdreg16(ks, KS_RXFHBCR); - frame_hdr++; - } - - frame_hdr = ks->frame_head_info; - while (ks->frame_cnt--) { - if (unlikely(!(frame_hdr->sts & RXFSHR_RXFV) || - frame_hdr->len >= RX_BUF_SIZE || - frame_hdr->len <= 0)) { - - /* discard an invalid packet */ - ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF)); - netdev->stats.rx_dropped++; - if (!(frame_hdr->sts & RXFSHR_RXFV)) - netdev->stats.rx_frame_errors++; - else - netdev->stats.rx_length_errors++; - frame_hdr++; - continue; - } - - skb = netdev_alloc_skb(netdev, frame_hdr->len + 16); - if (likely(skb)) { - skb_reserve(skb, 2); - /* read data block including CRC 4 bytes */ - ks_read_qmu(ks, (u16 *)skb->data, frame_hdr->len); - skb_put(skb, frame_hdr->len - 4); - skb->protocol = eth_type_trans(skb, netdev); - netif_rx(skb); - /* exclude CRC size */ - netdev->stats.rx_bytes += frame_hdr->len - 4; - netdev->stats.rx_packets++; - } else { - ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF)); - netdev->stats.rx_dropped++; - } - frame_hdr++; - } -} - -/** - * ks_update_link_status - link status update. - * @netdev: The network device being opened. - * @ks: The chip information - * - */ - -static void ks_update_link_status(struct net_device *netdev, struct ks_net *ks) -{ - /* check the status of the link */ - u32 link_up_status; - if (ks_rdreg16(ks, KS_P1SR) & P1SR_LINK_GOOD) { - netif_carrier_on(netdev); - link_up_status = true; - } else { - netif_carrier_off(netdev); - link_up_status = false; - } - netif_dbg(ks, link, ks->netdev, - "%s: %s\n", __func__, link_up_status ? "UP" : "DOWN"); -} - -/** - * ks_irq - device interrupt handler - * @irq: Interrupt number passed from the IRQ handler. - * @pw: The private word passed to register_irq(), our struct ks_net. - * - * This is the handler invoked to find out what happened - * - * Read the interrupt status, work out what needs to be done and then clear - * any of the interrupts that are not needed. - */ - -static irqreturn_t ks_irq(int irq, void *pw) -{ - struct net_device *netdev = pw; - struct ks_net *ks = netdev_priv(netdev); - unsigned long flags; - u16 status; - - spin_lock_irqsave(&ks->statelock, flags); - /*this should be the first in IRQ handler */ - ks_save_cmd_reg(ks); - - status = ks_rdreg16(ks, KS_ISR); - if (unlikely(!status)) { - ks_restore_cmd_reg(ks); - spin_unlock_irqrestore(&ks->statelock, flags); - return IRQ_NONE; - } - - ks_wrreg16(ks, KS_ISR, status); - - if (likely(status & IRQ_RXI)) - ks_rcv(ks, netdev); - - if (unlikely(status & IRQ_LCI)) - ks_update_link_status(netdev, ks); - - if (unlikely(status & IRQ_TXI)) - netif_wake_queue(netdev); - - if (unlikely(status & IRQ_LDI)) { - - u16 pmecr = ks_rdreg16(ks, KS_PMECR); - pmecr &= ~PMECR_WKEVT_MASK; - ks_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK); - } - - if (unlikely(status & IRQ_RXOI)) - ks->netdev->stats.rx_over_errors++; - /* this should be the last in IRQ handler*/ - ks_restore_cmd_reg(ks); - spin_unlock_irqrestore(&ks->statelock, flags); - return IRQ_HANDLED; -} - - -/** - * ks_net_open - open network device - * @netdev: The network device being opened. - * - * Called when the network device is marked active, such as a user executing - * 'ifconfig up' on the device. - */ -static int ks_net_open(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - int err; - -#define KS_INT_FLAGS IRQF_TRIGGER_LOW - /* lock the card, even if we may not actually do anything - * else at the moment. - */ - - netif_dbg(ks, ifup, ks->netdev, "%s - entry\n", __func__); - - /* reset the HW */ - err = request_irq(netdev->irq, ks_irq, KS_INT_FLAGS, DRV_NAME, netdev); - - if (err) { - pr_err("Failed to request IRQ: %d: %d\n", netdev->irq, err); - return err; - } - - /* wake up powermode to normal mode */ - ks_set_powermode(ks, PMECR_PM_NORMAL); - mdelay(1); /* wait for normal mode to take effect */ - - ks_wrreg16(ks, KS_ISR, 0xffff); - ks_enable_int(ks); - ks_enable_qmu(ks); - netif_start_queue(ks->netdev); - - netif_dbg(ks, ifup, ks->netdev, "network device up\n"); - - return 0; -} - -/** - * ks_net_stop - close network device - * @netdev: The device being closed. - * - * Called to close down a network device which has been active. Cancell any - * work, shutdown the RX and TX process and then place the chip into a low - * power state whilst it is not being used. - */ -static int ks_net_stop(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - - netif_info(ks, ifdown, netdev, "shutting down\n"); - - netif_stop_queue(netdev); - - mutex_lock(&ks->lock); - - /* turn off the IRQs and ack any outstanding */ - ks_wrreg16(ks, KS_IER, 0x0000); - ks_wrreg16(ks, KS_ISR, 0xffff); - - /* shutdown RX/TX QMU */ - ks_disable_qmu(ks); - ks_disable_int(ks); - - /* set powermode to soft power down to save power */ - ks_set_powermode(ks, PMECR_PM_SOFTDOWN); - free_irq(netdev->irq, netdev); - mutex_unlock(&ks->lock); - return 0; -} - - -/** - * ks_write_qmu - write 1 pkt data to the QMU. - * @ks: The chip information - * @pdata: buffer address to save 1 pkt - * @len: Pkt length in byte - * Here is the sequence to write 1 pkt: - * 1. set sudo DMA mode - * 2. write status/length - * 3. write pkt data - * 4. reset sudo DMA Mode - * 5. reset sudo DMA mode - * 6. Wait until pkt is out - */ -static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) -{ - /* start header at txb[0] to align txw entries */ - ks->txh.txw[0] = 0; - ks->txh.txw[1] = cpu_to_le16(len); - - /* 1. set sudo-DMA mode */ - ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); - /* 2. write status/lenth info */ - ks_outblk(ks, ks->txh.txw, 4); - /* 3. write pkt data */ - ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); - /* 4. reset sudo-DMA mode */ - ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); - /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ - ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); - /* 6. wait until TXQCR_METFE is auto-cleared */ - while (ks_rdreg16(ks, KS_TXQCR) & TXQCR_METFE) - ; -} - -/** - * ks_start_xmit - transmit packet - * @skb : The buffer to transmit - * @netdev : The device used to transmit the packet. - * - * Called by the network layer to transmit the @skb. - * spin_lock_irqsave is required because tx and rx should be mutual exclusive. - * So while tx is in-progress, prevent IRQ interrupt from happenning. - */ -static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) -{ - netdev_tx_t retv = NETDEV_TX_OK; - struct ks_net *ks = netdev_priv(netdev); - unsigned long flags; - - spin_lock_irqsave(&ks->statelock, flags); - - /* Extra space are required: - * 4 byte for alignment, 4 for status/length, 4 for CRC - */ - - if (likely(ks_tx_fifo_space(ks) >= skb->len + 12)) { - ks_write_qmu(ks, skb->data, skb->len); - /* add tx statistics */ - netdev->stats.tx_bytes += skb->len; - netdev->stats.tx_packets++; - dev_kfree_skb(skb); - } else - retv = NETDEV_TX_BUSY; - spin_unlock_irqrestore(&ks->statelock, flags); - return retv; -} - -/** - * ks_start_rx - ready to serve pkts - * @ks : The chip information - * - */ -static void ks_start_rx(struct ks_net *ks) -{ - u16 cntl; - - /* Enables QMU Receive (RXCR1). */ - cntl = ks_rdreg16(ks, KS_RXCR1); - cntl |= RXCR1_RXE ; - ks_wrreg16(ks, KS_RXCR1, cntl); -} /* ks_start_rx */ - -/** - * ks_stop_rx - stop to serve pkts - * @ks : The chip information - * - */ -static void ks_stop_rx(struct ks_net *ks) -{ - u16 cntl; - - /* Disables QMU Receive (RXCR1). */ - cntl = ks_rdreg16(ks, KS_RXCR1); - cntl &= ~RXCR1_RXE ; - ks_wrreg16(ks, KS_RXCR1, cntl); - -} /* ks_stop_rx */ - -static unsigned long const ethernet_polynomial = CRC32_POLY_BE; - -static unsigned long ether_gen_crc(int length, u8 *data) -{ - long crc = -1; - while (--length >= 0) { - u8 current_octet = *data++; - int bit; - - for (bit = 0; bit < 8; bit++, current_octet >>= 1) { - crc = (crc << 1) ^ - ((crc < 0) ^ (current_octet & 1) ? - ethernet_polynomial : 0); - } - } - return (unsigned long)crc; -} /* ether_gen_crc */ - -/** -* ks_set_grpaddr - set multicast information -* @ks : The chip information -*/ - -static void ks_set_grpaddr(struct ks_net *ks) -{ - u8 i; - u32 index, position, value; - - memset(ks->mcast_bits, 0, sizeof(u8) * HW_MCAST_SIZE); - - for (i = 0; i < ks->mcast_lst_size; i++) { - position = (ether_gen_crc(6, ks->mcast_lst[i]) >> 26) & 0x3f; - index = position >> 3; - value = 1 << (position & 7); - ks->mcast_bits[index] |= (u8)value; - } - - for (i = 0; i < HW_MCAST_SIZE; i++) { - if (i & 1) { - ks_wrreg16(ks, (u16)((KS_MAHTR0 + i) & ~1), - (ks->mcast_bits[i] << 8) | - ks->mcast_bits[i - 1]); - } - } -} /* ks_set_grpaddr */ - -/** -* ks_clear_mcast - clear multicast information -* -* @ks : The chip information -* This routine removes all mcast addresses set in the hardware. -*/ - -static void ks_clear_mcast(struct ks_net *ks) -{ - u16 i, mcast_size; - for (i = 0; i < HW_MCAST_SIZE; i++) - ks->mcast_bits[i] = 0; - - mcast_size = HW_MCAST_SIZE >> 2; - for (i = 0; i < mcast_size; i++) - ks_wrreg16(ks, KS_MAHTR0 + (2*i), 0); -} - -static void ks_set_promis(struct ks_net *ks, u16 promiscuous_mode) -{ - u16 cntl; - ks->promiscuous = promiscuous_mode; - ks_stop_rx(ks); /* Stop receiving for reconfiguration */ - cntl = ks_rdreg16(ks, KS_RXCR1); - - cntl &= ~RXCR1_FILTER_MASK; - if (promiscuous_mode) - /* Enable Promiscuous mode */ - cntl |= RXCR1_RXAE | RXCR1_RXINVF; - else - /* Disable Promiscuous mode (default normal mode) */ - cntl |= RXCR1_RXPAFMA; - - ks_wrreg16(ks, KS_RXCR1, cntl); - - if (ks->enabled) - ks_start_rx(ks); - -} /* ks_set_promis */ - -static void ks_set_mcast(struct ks_net *ks, u16 mcast) -{ - u16 cntl; - - ks->all_mcast = mcast; - ks_stop_rx(ks); /* Stop receiving for reconfiguration */ - cntl = ks_rdreg16(ks, KS_RXCR1); - cntl &= ~RXCR1_FILTER_MASK; - if (mcast) - /* Enable "Perfect with Multicast address passed mode" */ - cntl |= (RXCR1_RXAE | RXCR1_RXMAFMA | RXCR1_RXPAFMA); - else - /** - * Disable "Perfect with Multicast address passed - * mode" (normal mode). - */ - cntl |= RXCR1_RXPAFMA; - - ks_wrreg16(ks, KS_RXCR1, cntl); - - if (ks->enabled) - ks_start_rx(ks); -} /* ks_set_mcast */ - -static void ks_set_rx_mode(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - struct netdev_hw_addr *ha; - - /* Turn on/off promiscuous mode. */ - if ((netdev->flags & IFF_PROMISC) == IFF_PROMISC) - ks_set_promis(ks, - (u16)((netdev->flags & IFF_PROMISC) == IFF_PROMISC)); - /* Turn on/off all mcast mode. */ - else if ((netdev->flags & IFF_ALLMULTI) == IFF_ALLMULTI) - ks_set_mcast(ks, - (u16)((netdev->flags & IFF_ALLMULTI) == IFF_ALLMULTI)); - else - ks_set_promis(ks, false); - - if ((netdev->flags & IFF_MULTICAST) && netdev_mc_count(netdev)) { - if (netdev_mc_count(netdev) <= MAX_MCAST_LST) { - int i = 0; - - netdev_for_each_mc_addr(ha, netdev) { - if (i >= MAX_MCAST_LST) - break; - memcpy(ks->mcast_lst[i++], ha->addr, ETH_ALEN); - } - ks->mcast_lst_size = (u8)i; - ks_set_grpaddr(ks); - } else { - /** - * List too big to support so - * turn on all mcast mode. - */ - ks->mcast_lst_size = MAX_MCAST_LST; - ks_set_mcast(ks, true); - } - } else { - ks->mcast_lst_size = 0; - ks_clear_mcast(ks); - } -} /* ks_set_rx_mode */ - -static void ks_set_mac(struct ks_net *ks, u8 *data) -{ - u16 *pw = (u16 *)data; - u16 w, u; - - ks_stop_rx(ks); /* Stop receiving for reconfiguration */ - - u = *pw++; - w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF); - ks_wrreg16(ks, KS_MARH, w); - - u = *pw++; - w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF); - ks_wrreg16(ks, KS_MARM, w); - - u = *pw; - w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF); - ks_wrreg16(ks, KS_MARL, w); - - memcpy(ks->mac_addr, data, ETH_ALEN); - - if (ks->enabled) - ks_start_rx(ks); -} - -static int ks_set_mac_address(struct net_device *netdev, void *paddr) -{ - struct ks_net *ks = netdev_priv(netdev); - struct sockaddr *addr = paddr; - u8 *da; - - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - - da = (u8 *)netdev->dev_addr; - - ks_set_mac(ks, da); - return 0; -} - -static int ks_net_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) -{ - struct ks_net *ks = netdev_priv(netdev); - - if (!netif_running(netdev)) - return -EINVAL; - - return generic_mii_ioctl(&ks->mii, if_mii(req), cmd, NULL); -} - -static const struct net_device_ops ks_netdev_ops = { - .ndo_open = ks_net_open, - .ndo_stop = ks_net_stop, - .ndo_do_ioctl = ks_net_ioctl, - .ndo_start_xmit = ks_start_xmit, - .ndo_set_mac_address = ks_set_mac_address, - .ndo_set_rx_mode = ks_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, -}; - -/* ethtool support */ - -static void ks_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *di) -{ - strlcpy(di->driver, DRV_NAME, sizeof(di->driver)); - strlcpy(di->version, "1.00", sizeof(di->version)); - strlcpy(di->bus_info, dev_name(netdev->dev.parent), - sizeof(di->bus_info)); -} - -static u32 ks_get_msglevel(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - return ks->msg_enable; -} - -static void ks_set_msglevel(struct net_device *netdev, u32 to) -{ - struct ks_net *ks = netdev_priv(netdev); - ks->msg_enable = to; -} - -static int ks_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct ks_net *ks = netdev_priv(netdev); - - mii_ethtool_get_link_ksettings(&ks->mii, cmd); - - return 0; -} - -static int ks_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) -{ - struct ks_net *ks = netdev_priv(netdev); - return mii_ethtool_set_link_ksettings(&ks->mii, cmd); -} - -static u32 ks_get_link(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - return mii_link_ok(&ks->mii); -} - -static int ks_nway_reset(struct net_device *netdev) -{ - struct ks_net *ks = netdev_priv(netdev); - return mii_nway_restart(&ks->mii); -} - -static const struct ethtool_ops ks_ethtool_ops = { - .get_drvinfo = ks_get_drvinfo, - .get_msglevel = ks_get_msglevel, - .set_msglevel = ks_set_msglevel, - .get_link = ks_get_link, - .nway_reset = ks_nway_reset, - .get_link_ksettings = ks_get_link_ksettings, - .set_link_ksettings = ks_set_link_ksettings, -}; - -/* MII interface controls */ - -/** - * ks_phy_reg - convert MII register into a KS8851 register - * @reg: MII register number. - * - * Return the KS8851 register number for the corresponding MII PHY register - * if possible. Return zero if the MII register has no direct mapping to the - * KS8851 register set. - */ -static int ks_phy_reg(int reg) -{ - switch (reg) { - case MII_BMCR: - return KS_P1MBCR; - case MII_BMSR: - return KS_P1MBSR; - case MII_PHYSID1: - return KS_PHY1ILR; - case MII_PHYSID2: - return KS_PHY1IHR; - case MII_ADVERTISE: - return KS_P1ANAR; - case MII_LPA: - return KS_P1ANLPR; - } - - return 0x0; -} - -/** - * ks_phy_read - MII interface PHY register read. - * @netdev: The network device the PHY is on. - * @phy_addr: Address of PHY (ignored as we only have one) - * @reg: The register to read. - * - * This call reads data from the PHY register specified in @reg. Since the - * device does not support all the MII registers, the non-existent values - * are always returned as zero. - * - * We return zero for unsupported registers as the MII code does not check - * the value returned for any error status, and simply returns it to the - * caller. The mii-tool that the driver was tested with takes any -ve error - * as real PHY capabilities, thus displaying incorrect data to the user. - */ -static int ks_phy_read(struct net_device *netdev, int phy_addr, int reg) -{ - struct ks_net *ks = netdev_priv(netdev); - int ksreg; - int result; - - ksreg = ks_phy_reg(reg); - if (!ksreg) - return 0x0; /* no error return allowed, so use zero */ - - mutex_lock(&ks->lock); - result = ks_rdreg16(ks, ksreg); - mutex_unlock(&ks->lock); - - return result; -} - -static void ks_phy_write(struct net_device *netdev, - int phy, int reg, int value) -{ - struct ks_net *ks = netdev_priv(netdev); - int ksreg; - - ksreg = ks_phy_reg(reg); - if (ksreg) { - mutex_lock(&ks->lock); - ks_wrreg16(ks, ksreg, value); - mutex_unlock(&ks->lock); - } -} - -/** - * ks_read_selftest - read the selftest memory info. - * @ks: The device state - * - * Read and check the TX/RX memory selftest information. - */ -static int ks_read_selftest(struct ks_net *ks) -{ - unsigned both_done = MBIR_TXMBF | MBIR_RXMBF; - int ret = 0; - unsigned rd; - - rd = ks_rdreg16(ks, KS_MBIR); - - if ((rd & both_done) != both_done) { - netdev_warn(ks->netdev, "Memory selftest not finished\n"); - return 0; - } - - if (rd & MBIR_TXMBFA) { - netdev_err(ks->netdev, "TX memory selftest fails\n"); - ret |= 1; - } - - if (rd & MBIR_RXMBFA) { - netdev_err(ks->netdev, "RX memory selftest fails\n"); - ret |= 2; - } - - netdev_info(ks->netdev, "the selftest passes\n"); - return ret; -} - -static void ks_setup(struct ks_net *ks) -{ - u16 w; - - /** - * Configure QMU Transmit - */ - - /* Setup Transmit Frame Data Pointer Auto-Increment (TXFDPR) */ - ks_wrreg16(ks, KS_TXFDPR, TXFDPR_TXFPAI); - - /* Setup Receive Frame Data Pointer Auto-Increment */ - ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - - /* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */ - ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK); - - /* Setup RxQ Command Control (RXQCR) */ - ks->rc_rxqcr = RXQCR_CMD_CNTL; - ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); - - /** - * set the force mode to half duplex, default is full duplex - * because if the auto-negotiation fails, most switch uses - * half-duplex. - */ - - w = ks_rdreg16(ks, KS_P1MBCR); - w &= ~BMCR_FULLDPLX; - ks_wrreg16(ks, KS_P1MBCR, w); - - w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP; - ks_wrreg16(ks, KS_TXCR, w); - - w = RXCR1_RXFCE | RXCR1_RXBE | RXCR1_RXUE | RXCR1_RXME | RXCR1_RXIPFCC; - - if (ks->promiscuous) /* bPromiscuous */ - w |= (RXCR1_RXAE | RXCR1_RXINVF); - else if (ks->all_mcast) /* Multicast address passed mode */ - w |= (RXCR1_RXAE | RXCR1_RXMAFMA | RXCR1_RXPAFMA); - else /* Normal mode */ - w |= RXCR1_RXPAFMA; - - ks_wrreg16(ks, KS_RXCR1, w); -} /*ks_setup */ - - -static void ks_setup_int(struct ks_net *ks) -{ - ks->rc_ier = 0x00; - /* Clear the interrupts status of the hardware. */ - ks_wrreg16(ks, KS_ISR, 0xffff); - - /* Enables the interrupts of the hardware. */ - ks->rc_ier = (IRQ_LCI | IRQ_TXI | IRQ_RXI); -} /* ks_setup_int */ - -static int ks_hw_init(struct ks_net *ks) -{ -#define MHEADER_SIZE (sizeof(struct type_frame_head) * MAX_RECV_FRAMES) - ks->promiscuous = 0; - ks->all_mcast = 0; - ks->mcast_lst_size = 0; - - ks->frame_head_info = devm_kmalloc(&ks->pdev->dev, MHEADER_SIZE, - GFP_KERNEL); - if (!ks->frame_head_info) - return false; - - ks_set_mac(ks, KS_DEFAULT_MAC_ADDRESS); - return true; -} - -#if defined(CONFIG_OF) -static const struct of_device_id ks8851_ml_dt_ids[] = { - { .compatible = "micrel,ks8851-mll" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, ks8851_ml_dt_ids); -#endif - -static int ks8851_probe(struct platform_device *pdev) -{ - int err; - struct net_device *netdev; - struct ks_net *ks; - u16 id, data; - const char *mac; - - netdev = alloc_etherdev(sizeof(struct ks_net)); - if (!netdev) - return -ENOMEM; - - SET_NETDEV_DEV(netdev, &pdev->dev); - - ks = netdev_priv(netdev); - ks->netdev = netdev; - - ks->hw_addr = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(ks->hw_addr)) { - err = PTR_ERR(ks->hw_addr); - goto err_free; - } - - ks->hw_addr_cmd = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(ks->hw_addr_cmd)) { - err = PTR_ERR(ks->hw_addr_cmd); - goto err_free; - } - - err = ks_check_endian(ks); - if (err) - goto err_free; - - netdev->irq = platform_get_irq(pdev, 0); - - if ((int)netdev->irq < 0) { - err = netdev->irq; - goto err_free; - } - - ks->pdev = pdev; - - mutex_init(&ks->lock); - spin_lock_init(&ks->statelock); - - netdev->netdev_ops = &ks_netdev_ops; - netdev->ethtool_ops = &ks_ethtool_ops; - - /* setup mii state */ - ks->mii.dev = netdev; - ks->mii.phy_id = 1, - ks->mii.phy_id_mask = 1; - ks->mii.reg_num_mask = 0xf; - ks->mii.mdio_read = ks_phy_read; - ks->mii.mdio_write = ks_phy_write; - - netdev_info(netdev, "message enable is %d\n", msg_enable); - /* set the default message enable */ - ks->msg_enable = netif_msg_init(msg_enable, (NETIF_MSG_DRV | - NETIF_MSG_PROBE | - NETIF_MSG_LINK)); - ks_read_config(ks); - - /* simple check for a valid chip being connected to the bus */ - if ((ks_rdreg16(ks, KS_CIDER) & ~CIDER_REV_MASK) != CIDER_ID) { - netdev_err(netdev, "failed to read device ID\n"); - err = -ENODEV; - goto err_free; - } - - if (ks_read_selftest(ks)) { - netdev_err(netdev, "failed to read device ID\n"); - err = -ENODEV; - goto err_free; - } - - err = register_netdev(netdev); - if (err) - goto err_free; - - platform_set_drvdata(pdev, netdev); - - ks_soft_reset(ks, GRR_GSR); - ks_hw_init(ks); - ks_disable_qmu(ks); - ks_setup(ks); - ks_setup_int(ks); - - data = ks_rdreg16(ks, KS_OBCR); - ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA); - - /* overwriting the default MAC address */ - if (pdev->dev.of_node) { - mac = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac)) - ether_addr_copy(ks->mac_addr, mac); - } else { - struct ks8851_mll_platform_data *pdata; - - pdata = dev_get_platdata(&pdev->dev); - if (!pdata) { - netdev_err(netdev, "No platform data\n"); - err = -ENODEV; - goto err_pdata; - } - memcpy(ks->mac_addr, pdata->mac_addr, ETH_ALEN); - } - if (!is_valid_ether_addr(ks->mac_addr)) { - /* Use random MAC address if none passed */ - eth_random_addr(ks->mac_addr); - netdev_info(netdev, "Using random mac address\n"); - } - netdev_info(netdev, "Mac address is: %pM\n", ks->mac_addr); - - memcpy(netdev->dev_addr, ks->mac_addr, ETH_ALEN); - - ks_set_mac(ks, netdev->dev_addr); - - id = ks_rdreg16(ks, KS_CIDER); - - netdev_info(netdev, "Found chip, family: 0x%x, id: 0x%x, rev: 0x%x\n", - (id >> 8) & 0xff, (id >> 4) & 0xf, (id >> 1) & 0x7); - return 0; - -err_pdata: - unregister_netdev(netdev); -err_free: - free_netdev(netdev); - return err; -} - -static int ks8851_remove(struct platform_device *pdev) -{ - struct net_device *netdev = platform_get_drvdata(pdev); - - unregister_netdev(netdev); - free_netdev(netdev); - return 0; - -} - -static struct platform_driver ks8851_platform_driver = { - .driver = { - .name = DRV_NAME, - .of_match_table = of_match_ptr(ks8851_ml_dt_ids), - }, - .probe = ks8851_probe, - .remove = ks8851_remove, -}; - -module_platform_driver(ks8851_platform_driver); - -MODULE_DESCRIPTION("KS8851 MLL Network driver"); -MODULE_AUTHOR("David Choi <david.choi@micrel.com>"); -MODULE_LICENSE("GPL"); -module_param_named(message, msg_enable, int, 0); -MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)"); - diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c new file mode 100644 index 000000000000..3bab0cb2b1a5 --- /dev/null +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* drivers/net/ethernet/micrel/ks8851.c + * + * Copyright 2009 Simtec Electronics + * http://www.simtec.co.uk/ + * Ben Dooks <ben@simtec.co.uk> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DEBUG + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/iopoll.h> +#include <linux/mii.h> + +#include <linux/platform_device.h> +#include <linux/of_net.h> + +#include "ks8851.h" + +static int msg_enable; + +#define BE3 0x8000 /* Byte Enable 3 */ +#define BE2 0x4000 /* Byte Enable 2 */ +#define BE1 0x2000 /* Byte Enable 1 */ +#define BE0 0x1000 /* Byte Enable 0 */ + +/** + * struct ks8851_net_par - KS8851 Parallel driver private data + * @ks8851: KS8851 driver common private data + * @lock: Lock to ensure that the device is not accessed when busy. + * @hw_addr : start address of data register. + * @hw_addr_cmd : start address of command register. + * @cmd_reg_cache : command register cached. + * + * The @lock ensures that the chip is protected when certain operations are + * in progress. When the read or write packet transfer is in progress, most + * of the chip registers are not accessible until the transfer is finished + * and the DMA has been de-asserted. + */ +struct ks8851_net_par { + struct ks8851_net ks8851; + spinlock_t lock; + void __iomem *hw_addr; + void __iomem *hw_addr_cmd; + u16 cmd_reg_cache; +}; + +#define to_ks8851_par(ks) container_of((ks), struct ks8851_net_par, ks8851) + +/** + * ks8851_lock_par - register access lock + * @ks: The chip state + * @flags: Spinlock flags + * + * Claim chip register access lock + */ +static void ks8851_lock_par(struct ks8851_net *ks, unsigned long *flags) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + + spin_lock_irqsave(&ksp->lock, *flags); +} + +/** + * ks8851_unlock_par - register access unlock + * @ks: The chip state + * @flags: Spinlock flags + * + * Release chip register access lock + */ +static void ks8851_unlock_par(struct ks8851_net *ks, unsigned long *flags) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + + spin_unlock_irqrestore(&ksp->lock, *flags); +} + +/** + * ks_check_endian - Check whether endianness of the bus is correct + * @ks : The chip information + * + * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit + * bus. To maintain optimum performance, the bus endianness should be set + * such that it matches the endianness of the CPU. + */ +static int ks_check_endian(struct ks8851_net *ks) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + u16 cider; + + /* + * Read CIDER register first, however read it the "wrong" way around. + * If the endian strap on the KS8851-16MLL in incorrect and the chip + * is operating in different endianness than the CPU, then the meaning + * of BE[3:0] byte-enable bits is also swapped such that: + * BE[3,2,1,0] becomes BE[1,0,3,2] + * + * Luckily for us, the byte-enable bits are the top four MSbits of + * the address register and the CIDER register is at offset 0xc0. + * Hence, by reading address 0xc0c0, which is not impacted by endian + * swapping, we assert either BE[3:2] or BE[1:0] while reading the + * CIDER register. + * + * If the bus configuration is correct, reading 0xc0c0 asserts + * BE[3:2] and this read returns 0x0000, because to read register + * with bottom two LSbits of address set to 0, BE[1:0] must be + * asserted. + * + * If the bus configuration is NOT correct, reading 0xc0c0 asserts + * BE[1:0] and this read returns non-zero 0x8872 value. + */ + iowrite16(BE3 | BE2 | KS_CIDER, ksp->hw_addr_cmd); + cider = ioread16(ksp->hw_addr); + if (!cider) + return 0; + + netdev_err(ks->netdev, "incorrect EESK endian strap setting\n"); + + return -EINVAL; +} + +/** + * ks8851_wrreg16_par - write 16bit register value to chip + * @ks: The chip state + * @reg: The register address + * @val: The value to write + * + * Issue a write to put the value @val into the register specified in @reg. + */ +static void ks8851_wrreg16_par(struct ks8851_net *ks, unsigned int reg, + unsigned int val) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + + ksp->cmd_reg_cache = (u16)reg | ((BE1 | BE0) << (reg & 0x02)); + iowrite16(ksp->cmd_reg_cache, ksp->hw_addr_cmd); + iowrite16(val, ksp->hw_addr); +} + +/** + * ks8851_rdreg16_par - read 16 bit register from chip + * @ks: The chip information + * @reg: The register address + * + * Read a 16bit register from the chip, returning the result + */ +static unsigned int ks8851_rdreg16_par(struct ks8851_net *ks, unsigned int reg) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + + ksp->cmd_reg_cache = (u16)reg | ((BE1 | BE0) << (reg & 0x02)); + iowrite16(ksp->cmd_reg_cache, ksp->hw_addr_cmd); + return ioread16(ksp->hw_addr); +} + +/** + * ks8851_rdfifo_par - read data from the receive fifo + * @ks: The device state. + * @buff: The buffer address + * @len: The length of the data to read + * + * Issue an RXQ FIFO read command and read the @len amount of data from + * the FIFO into the buffer specified by @buff. + */ +static void ks8851_rdfifo_par(struct ks8851_net *ks, u8 *buff, unsigned int len) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + + netif_dbg(ks, rx_status, ks->netdev, + "%s: %d@%p\n", __func__, len, buff); + + ioread16_rep(ksp->hw_addr, (u16 *)buff + 1, len / 2); +} + +/** + * ks8851_wrfifo_par - write packet to TX FIFO + * @ks: The device state. + * @txp: The sk_buff to transmit. + * @irq: IRQ on completion of the packet. + * + * Send the @txp to the chip. This means creating the relevant packet header + * specifying the length of the packet and the other information the chip + * needs, such as IRQ on completion. Send the header and the packet data to + * the device. + */ +static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp, + bool irq) +{ + struct ks8851_net_par *ksp = to_ks8851_par(ks); + unsigned int len = ALIGN(txp->len, 4); + unsigned int fid = 0; + + netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n", + __func__, txp, txp->len, txp->data, irq); + + fid = ks->fid++; + fid &= TXFR_TXFID_MASK; + + if (irq) + fid |= TXFR_TXIC; /* irq on completion */ + + iowrite16(fid, ksp->hw_addr); + iowrite16(txp->len, ksp->hw_addr); + + iowrite16_rep(ksp->hw_addr, txp->data, len / 2); +} + +/** + * ks8851_rx_skb_par - receive skbuff + * @ks: The device state. + * @skb: The skbuff + */ +static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb) +{ + netif_rx(skb); +} + +static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks) +{ + return ks8851_rdreg16_par(ks, KS_TXQCR); +} + +/** + * ks8851_start_xmit_par - transmit packet + * @skb: The buffer to transmit + * @dev: The device used to transmit the packet. + * + * Called by the network layer to transmit the @skb. Queue the packet for + * the device and schedule the necessary work to transmit the packet when + * it is free. + * + * We do this to firstly avoid sleeping with the network device locked, + * and secondly so we can round up more than one packet to transmit which + * means we can try and avoid generating too many transmit done interrupts. + */ +static netdev_tx_t ks8851_start_xmit_par(struct sk_buff *skb, + struct net_device *dev) +{ + struct ks8851_net *ks = netdev_priv(dev); + netdev_tx_t ret = NETDEV_TX_OK; + unsigned long flags; + unsigned int txqcr; + u16 txmir; + int err; + + netif_dbg(ks, tx_queued, ks->netdev, + "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data); + + ks8851_lock_par(ks, &flags); + + txmir = ks8851_rdreg16_par(ks, KS_TXMIR) & 0x1fff; + + if (likely(txmir >= skb->len + 12)) { + ks8851_wrreg16_par(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); + ks8851_wrfifo_par(ks, skb, false); + ks8851_wrreg16_par(ks, KS_RXQCR, ks->rc_rxqcr); + ks8851_wrreg16_par(ks, KS_TXQCR, TXQCR_METFE); + + err = readx_poll_timeout_atomic(ks8851_rdreg16_par_txqcr, ks, + txqcr, !(txqcr & TXQCR_METFE), + 5, 1000000); + if (err) + ret = NETDEV_TX_BUSY; + + ks8851_done_tx(ks, skb); + } else { + ret = NETDEV_TX_BUSY; + } + + ks8851_unlock_par(ks, &flags); + + return ret; +} + +static int ks8851_probe_par(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ks8851_net_par *ksp; + struct net_device *netdev; + struct ks8851_net *ks; + int ret; + + netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_par)); + if (!netdev) + return -ENOMEM; + + ks = netdev_priv(netdev); + + ks->lock = ks8851_lock_par; + ks->unlock = ks8851_unlock_par; + ks->rdreg16 = ks8851_rdreg16_par; + ks->wrreg16 = ks8851_wrreg16_par; + ks->rdfifo = ks8851_rdfifo_par; + ks->wrfifo = ks8851_wrfifo_par; + ks->start_xmit = ks8851_start_xmit_par; + ks->rx_skb = ks8851_rx_skb_par; + +#define STD_IRQ (IRQ_LCI | /* Link Change */ \ + IRQ_RXI | /* RX done */ \ + IRQ_RXPSI) /* RX process stop */ + ks->rc_ier = STD_IRQ; + + ksp = to_ks8851_par(ks); + spin_lock_init(&ksp->lock); + + ksp->hw_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ksp->hw_addr)) + return PTR_ERR(ksp->hw_addr); + + ksp->hw_addr_cmd = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(ksp->hw_addr_cmd)) + return PTR_ERR(ksp->hw_addr_cmd); + + ret = ks_check_endian(ks); + if (ret) + return ret; + + netdev->irq = platform_get_irq(pdev, 0); + + return ks8851_probe_common(netdev, dev, msg_enable); +} + +static int ks8851_remove_par(struct platform_device *pdev) +{ + return ks8851_remove_common(&pdev->dev); +} + +static const struct of_device_id ks8851_match_table[] = { + { .compatible = "micrel,ks8851-mll" }, + { } +}; +MODULE_DEVICE_TABLE(of, ks8851_match_table); + +static struct platform_driver ks8851_driver = { + .driver = { + .name = "ks8851", + .of_match_table = ks8851_match_table, + .pm = &ks8851_pm_ops, + }, + .probe = ks8851_probe_par, + .remove = ks8851_remove_par, +}; +module_platform_driver(ks8851_driver); + +MODULE_DESCRIPTION("KS8851 Network driver"); +MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); +MODULE_LICENSE("GPL"); + +module_param_named(message, msg_enable, int, 0); +MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)"); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c new file mode 100644 index 000000000000..4ec7f1615977 --- /dev/null +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* drivers/net/ethernet/micrel/ks8851.c + * + * Copyright 2009 Simtec Electronics + * http://www.simtec.co.uk/ + * Ben Dooks <ben@simtec.co.uk> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DEBUG + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/cache.h> +#include <linux/crc32.h> +#include <linux/mii.h> +#include <linux/regulator/consumer.h> + +#include <linux/spi/spi.h> +#include <linux/gpio.h> +#include <linux/of_gpio.h> +#include <linux/of_net.h> + +#include "ks8851.h" + +static int msg_enable; + +/** + * struct ks8851_net_spi - KS8851 SPI driver private data + * @lock: Lock to ensure that the device is not accessed when busy. + * @tx_work: Work queue for tx packets + * @ks8851: KS8851 driver common private data + * @spidev: The spi device we're bound to. + * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1. + * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2. + * @spi_xfer1: @spi_msg1 SPI transfer structure + * @spi_xfer2: @spi_msg2 SPI transfer structure + * + * The @lock ensures that the chip is protected when certain operations are + * in progress. When the read or write packet transfer is in progress, most + * of the chip registers are not ccessible until the transfer is finished and + * the DMA has been de-asserted. + */ +struct ks8851_net_spi { + struct ks8851_net ks8851; + struct mutex lock; + struct work_struct tx_work; + struct spi_device *spidev; + struct spi_message spi_msg1; + struct spi_message spi_msg2; + struct spi_transfer spi_xfer1; + struct spi_transfer spi_xfer2[2]; +}; + +#define to_ks8851_spi(ks) container_of((ks), struct ks8851_net_spi, ks8851) + +/* SPI frame opcodes */ +#define KS_SPIOP_RD 0x00 +#define KS_SPIOP_WR 0x40 +#define KS_SPIOP_RXFIFO 0x80 +#define KS_SPIOP_TXFIFO 0xC0 + +/* shift for byte-enable data */ +#define BYTE_EN(_x) ((_x) << 2) + +/* turn register number and byte-enable mask into data for start of packet */ +#define MK_OP(_byteen, _reg) \ + (BYTE_EN(_byteen) | (_reg) << (8 + 2) | (_reg) >> 6) + +/** + * ks8851_lock_spi - register access lock + * @ks: The chip state + * @flags: Spinlock flags + * + * Claim chip register access lock + */ +static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + + mutex_lock(&kss->lock); +} + +/** + * ks8851_unlock_spi - register access unlock + * @ks: The chip state + * @flags: Spinlock flags + * + * Release chip register access lock + */ +static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + + mutex_unlock(&kss->lock); +} + +/* SPI register read/write calls. + * + * All these calls issue SPI transactions to access the chip's registers. They + * all require that the necessary lock is held to prevent accesses when the + * chip is busy transferring packet data (RX/TX FIFO accesses). + */ + +/** + * ks8851_wrreg16_spi - write 16bit register value to chip via SPI + * @ks: The chip state + * @reg: The register address + * @val: The value to write + * + * Issue a write to put the value @val into the register specified in @reg. + */ +static void ks8851_wrreg16_spi(struct ks8851_net *ks, unsigned int reg, + unsigned int val) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + struct spi_transfer *xfer = &kss->spi_xfer1; + struct spi_message *msg = &kss->spi_msg1; + __le16 txb[2]; + int ret; + + txb[0] = cpu_to_le16(MK_OP(reg & 2 ? 0xC : 0x03, reg) | KS_SPIOP_WR); + txb[1] = cpu_to_le16(val); + + xfer->tx_buf = txb; + xfer->rx_buf = NULL; + xfer->len = 4; + + ret = spi_sync(kss->spidev, msg); + if (ret < 0) + netdev_err(ks->netdev, "spi_sync() failed\n"); +} + +/** + * ks8851_rdreg - issue read register command and return the data + * @ks: The device state + * @op: The register address and byte enables in message format. + * @rxb: The RX buffer to return the result into + * @rxl: The length of data expected. + * + * This is the low level read call that issues the necessary spi message(s) + * to read data from the register specified in @op. + */ +static void ks8851_rdreg(struct ks8851_net *ks, unsigned int op, + u8 *rxb, unsigned int rxl) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + struct spi_transfer *xfer; + struct spi_message *msg; + __le16 *txb = (__le16 *)ks->txd; + u8 *trx = ks->rxd; + int ret; + + txb[0] = cpu_to_le16(op | KS_SPIOP_RD); + + if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) { + msg = &kss->spi_msg2; + xfer = kss->spi_xfer2; + + xfer->tx_buf = txb; + xfer->rx_buf = NULL; + xfer->len = 2; + + xfer++; + xfer->tx_buf = NULL; + xfer->rx_buf = trx; + xfer->len = rxl; + } else { + msg = &kss->spi_msg1; + xfer = &kss->spi_xfer1; + + xfer->tx_buf = txb; + xfer->rx_buf = trx; + xfer->len = rxl + 2; + } + + ret = spi_sync(kss->spidev, msg); + if (ret < 0) + netdev_err(ks->netdev, "read: spi_sync() failed\n"); + else if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) + memcpy(rxb, trx, rxl); + else + memcpy(rxb, trx + 2, rxl); +} + +/** + * ks8851_rdreg16_spi - read 16 bit register from device via SPI + * @ks: The chip information + * @reg: The register address + * + * Read a 16bit register from the chip, returning the result + */ +static unsigned int ks8851_rdreg16_spi(struct ks8851_net *ks, unsigned int reg) +{ + __le16 rx = 0; + + ks8851_rdreg(ks, MK_OP(reg & 2 ? 0xC : 0x3, reg), (u8 *)&rx, 2); + return le16_to_cpu(rx); +} + +/** + * ks8851_rdfifo_spi - read data from the receive fifo via SPI + * @ks: The device state. + * @buff: The buffer address + * @len: The length of the data to read + * + * Issue an RXQ FIFO read command and read the @len amount of data from + * the FIFO into the buffer specified by @buff. + */ +static void ks8851_rdfifo_spi(struct ks8851_net *ks, u8 *buff, unsigned int len) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + struct spi_transfer *xfer = kss->spi_xfer2; + struct spi_message *msg = &kss->spi_msg2; + u8 txb[1]; + int ret; + + netif_dbg(ks, rx_status, ks->netdev, + "%s: %d@%p\n", __func__, len, buff); + + /* set the operation we're issuing */ + txb[0] = KS_SPIOP_RXFIFO; + + xfer->tx_buf = txb; + xfer->rx_buf = NULL; + xfer->len = 1; + + xfer++; + xfer->rx_buf = buff; + xfer->tx_buf = NULL; + xfer->len = len; + + ret = spi_sync(kss->spidev, msg); + if (ret < 0) + netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); +} + +/** + * ks8851_wrfifo_spi - write packet to TX FIFO via SPI + * @ks: The device state. + * @txp: The sk_buff to transmit. + * @irq: IRQ on completion of the packet. + * + * Send the @txp to the chip. This means creating the relevant packet header + * specifying the length of the packet and the other information the chip + * needs, such as IRQ on completion. Send the header and the packet data to + * the device. + */ +static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, + bool irq) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + struct spi_transfer *xfer = kss->spi_xfer2; + struct spi_message *msg = &kss->spi_msg2; + unsigned int fid = 0; + int ret; + + netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n", + __func__, txp, txp->len, txp->data, irq); + + fid = ks->fid++; + fid &= TXFR_TXFID_MASK; + + if (irq) + fid |= TXFR_TXIC; /* irq on completion */ + + /* start header at txb[1] to align txw entries */ + ks->txh.txb[1] = KS_SPIOP_TXFIFO; + ks->txh.txw[1] = cpu_to_le16(fid); + ks->txh.txw[2] = cpu_to_le16(txp->len); + + xfer->tx_buf = &ks->txh.txb[1]; + xfer->rx_buf = NULL; + xfer->len = 5; + + xfer++; + xfer->tx_buf = txp->data; + xfer->rx_buf = NULL; + xfer->len = ALIGN(txp->len, 4); + + ret = spi_sync(kss->spidev, msg); + if (ret < 0) + netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); +} + +/** + * ks8851_rx_skb_spi - receive skbuff + * @ks: The device state + * @skb: The skbuff + */ +static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) +{ + netif_rx_ni(skb); +} + +/** + * ks8851_tx_work - process tx packet(s) + * @work: The work strucutre what was scheduled. + * + * This is called when a number of packets have been scheduled for + * transmission and need to be sent to the device. + */ +static void ks8851_tx_work(struct work_struct *work) +{ + struct ks8851_net_spi *kss; + struct ks8851_net *ks; + unsigned long flags; + struct sk_buff *txb; + bool last; + + kss = container_of(work, struct ks8851_net_spi, tx_work); + ks = &kss->ks8851; + last = skb_queue_empty(&ks->txq); + + ks8851_lock_spi(ks, &flags); + + while (!last) { + txb = skb_dequeue(&ks->txq); + last = skb_queue_empty(&ks->txq); + + if (txb) { + ks8851_wrreg16_spi(ks, KS_RXQCR, + ks->rc_rxqcr | RXQCR_SDA); + ks8851_wrfifo_spi(ks, txb, last); + ks8851_wrreg16_spi(ks, KS_RXQCR, ks->rc_rxqcr); + ks8851_wrreg16_spi(ks, KS_TXQCR, TXQCR_METFE); + + ks8851_done_tx(ks, txb); + } + } + + ks8851_unlock_spi(ks, &flags); +} + +/** + * ks8851_flush_tx_work_spi - flush outstanding TX work + * @ks: The device state + */ +static void ks8851_flush_tx_work_spi(struct ks8851_net *ks) +{ + struct ks8851_net_spi *kss = to_ks8851_spi(ks); + + flush_work(&kss->tx_work); +} + +/** + * calc_txlen - calculate size of message to send packet + * @len: Length of data + * + * Returns the size of the TXFIFO message needed to send + * this packet. + */ +static unsigned int calc_txlen(unsigned int len) +{ + return ALIGN(len + 4, 4); +} + +/** + * ks8851_start_xmit_spi - transmit packet using SPI + * @skb: The buffer to transmit + * @dev: The device used to transmit the packet. + * + * Called by the network layer to transmit the @skb. Queue the packet for + * the device and schedule the necessary work to transmit the packet when + * it is free. + * + * We do this to firstly avoid sleeping with the network device locked, + * and secondly so we can round up more than one packet to transmit which + * means we can try and avoid generating too many transmit done interrupts. + */ +static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb, + struct net_device *dev) +{ + unsigned int needed = calc_txlen(skb->len); + struct ks8851_net *ks = netdev_priv(dev); + netdev_tx_t ret = NETDEV_TX_OK; + struct ks8851_net_spi *kss; + + kss = to_ks8851_spi(ks); + + netif_dbg(ks, tx_queued, ks->netdev, + "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data); + + spin_lock(&ks->statelock); + + if (needed > ks->tx_space) { + netif_stop_queue(dev); + ret = NETDEV_TX_BUSY; + } else { + ks->tx_space -= needed; + skb_queue_tail(&ks->txq, skb); + } + + spin_unlock(&ks->statelock); + schedule_work(&kss->tx_work); + + return ret; +} + +static int ks8851_probe_spi(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct ks8851_net_spi *kss; + struct net_device *netdev; + struct ks8851_net *ks; + + netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi)); + if (!netdev) + return -ENOMEM; + + spi->bits_per_word = 8; + + ks = netdev_priv(netdev); + + ks->lock = ks8851_lock_spi; + ks->unlock = ks8851_unlock_spi; + ks->rdreg16 = ks8851_rdreg16_spi; + ks->wrreg16 = ks8851_wrreg16_spi; + ks->rdfifo = ks8851_rdfifo_spi; + ks->wrfifo = ks8851_wrfifo_spi; + ks->start_xmit = ks8851_start_xmit_spi; + ks->rx_skb = ks8851_rx_skb_spi; + ks->flush_tx_work = ks8851_flush_tx_work_spi; + +#define STD_IRQ (IRQ_LCI | /* Link Change */ \ + IRQ_TXI | /* TX done */ \ + IRQ_RXI | /* RX done */ \ + IRQ_SPIBEI | /* SPI bus error */ \ + IRQ_TXPSI | /* TX process stop */ \ + IRQ_RXPSI) /* RX process stop */ + ks->rc_ier = STD_IRQ; + + kss = to_ks8851_spi(ks); + + kss->spidev = spi; + mutex_init(&kss->lock); + INIT_WORK(&kss->tx_work, ks8851_tx_work); + + /* initialise pre-made spi transfer messages */ + spi_message_init(&kss->spi_msg1); + spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1); + + spi_message_init(&kss->spi_msg2); + spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2); + spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2); + + netdev->irq = spi->irq; + + return ks8851_probe_common(netdev, dev, msg_enable); +} + +static int ks8851_remove_spi(struct spi_device *spi) +{ + return ks8851_remove_common(&spi->dev); +} + +static const struct of_device_id ks8851_match_table[] = { + { .compatible = "micrel,ks8851" }, + { } +}; +MODULE_DEVICE_TABLE(of, ks8851_match_table); + +static struct spi_driver ks8851_driver = { + .driver = { + .name = "ks8851", + .of_match_table = ks8851_match_table, + .pm = &ks8851_pm_ops, + }, + .probe = ks8851_probe_spi, + .remove = ks8851_remove_spi, +}; +module_spi_driver(ks8851_driver); + +MODULE_DESCRIPTION("KS8851 Network driver"); +MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); +MODULE_LICENSE("GPL"); + +module_param_named(message, msg_enable, int, 0); +MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)"); +MODULE_ALIAS("spi:ks8851"); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index d054553c75e0..ca7032d22196 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -708,7 +708,7 @@ static int nfp_flower_sync_feature_bits(struct nfp_app *app) err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_balance_sync_enable", 1); if (!err) { - app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG; + app_priv->flower_en_feats |= NFP_FL_ENABLE_LAG; nfp_flower_lag_init(&app_priv->nfp_lag); } else if (err == -ENOENT) { nfp_warn(app->cpp, "LAG not supported by FW.\n"); @@ -721,7 +721,7 @@ static int nfp_flower_sync_feature_bits(struct nfp_app *app) err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_merge_hint_enable", 1); if (!err) { - app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE; + app_priv->flower_en_feats |= NFP_FL_ENABLE_FLOW_MERGE; nfp_flower_internal_port_init(app_priv); } else if (err == -ENOENT) { nfp_warn(app->cpp, @@ -840,7 +840,7 @@ static int nfp_flower_init(struct nfp_app *app) return 0; err_cleanup: - if (app_priv->flower_ext_feats & NFP_FL_ENABLE_LAG) + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) nfp_flower_lag_cleanup(&app_priv->nfp_lag); nfp_flower_metadata_cleanup(app); err_free_app_priv: diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e634e8110585..964c5e842cec 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3228,7 +3228,6 @@ reset_nic: static int efx_ef10_set_mac_address(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); - struct efx_ef10_nic_data *nic_data = efx->nic_data; bool was_enabled = efx->port_enabled; int rc; @@ -3256,6 +3255,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) #ifdef CONFIG_SFC_SRIOV if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; if (rc == -EPERM) { diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index b46f8d2ae6d7..36bd2e18f23b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -196,6 +196,19 @@ config DWMAC_SUN8I This selects Allwinner SoC glue layer support for the stmmac device driver. This driver is used for H3/A83T/A64 EMAC ethernet controller. + +config DWMAC_IMX8 + tristate "NXP IMX8 DWMAC support" + default ARCH_MXC + depends on OF && (ARCH_MXC || COMPILE_TEST) + select MFD_SYSCON + ---help--- + Support for ethernet controller on NXP i.MX8 SOCs. + + This selects NXP SoC glue layer support for the stmmac + device driver. This driver is used for i.MX8 series like + iMX8MP/iMX8DXL GMAC ethernet controller. + endif config DWMAC_INTEL diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index f9d024d6b69b..295615ab36a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o obj-$(CONFIG_DWMAC_SUN8I) += dwmac-sun8i.o obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o +obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o stmmac-platform-objs:= stmmac_platform.o dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c new file mode 100644 index 000000000000..5010af7dab4a --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwmac-imx.c - DWMAC Specific Glue layer for NXP imx8 + * + * Copyright 2020 NXP + * + */ + +#include <linux/clk.h> +#include <linux/gpio/consumer.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_net.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/pm_wakeirq.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/stmmac.h> + +#include "stmmac_platform.h" + +#define GPR_ENET_QOS_INTF_MODE_MASK GENMASK(21, 16) +#define GPR_ENET_QOS_INTF_SEL_MII (0x0 << 16) +#define GPR_ENET_QOS_INTF_SEL_RMII (0x4 << 16) +#define GPR_ENET_QOS_INTF_SEL_RGMII (0x1 << 16) +#define GPR_ENET_QOS_CLK_GEN_EN (0x1 << 19) +#define GPR_ENET_QOS_CLK_TX_CLK_SEL (0x1 << 20) +#define GPR_ENET_QOS_RGMII_EN (0x1 << 21) + +struct imx_dwmac_ops { + u32 addr_width; + bool mac_rgmii_txclk_auto_adj; + + int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); +}; + +struct imx_priv_data { + struct device *dev; + struct clk *clk_tx; + struct clk *clk_mem; + struct regmap *intf_regmap; + u32 intf_reg_off; + bool rmii_refclk_ext; + + const struct imx_dwmac_ops *ops; + struct plat_stmmacenet_data *plat_dat; +}; + +static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) +{ + struct imx_priv_data *dwmac = plat_dat->bsp_priv; + int val; + + switch (plat_dat->interface) { + case PHY_INTERFACE_MODE_MII: + val = GPR_ENET_QOS_INTF_SEL_MII; + break; + case PHY_INTERFACE_MODE_RMII: + val = GPR_ENET_QOS_INTF_SEL_RMII; + val |= (dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL); + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + val = GPR_ENET_QOS_INTF_SEL_RGMII | + GPR_ENET_QOS_RGMII_EN; + break; + default: + pr_debug("imx dwmac doesn't support %d interface\n", + plat_dat->interface); + return -EINVAL; + } + + val |= GPR_ENET_QOS_CLK_GEN_EN; + return regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off, + GPR_ENET_QOS_INTF_MODE_MASK, val); +}; + +static int +imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) +{ + int ret = 0; + + /* TBD: depends on imx8dxl scu interfaces to be upstreamed */ + return ret; +} + +static int imx_dwmac_init(struct platform_device *pdev, void *priv) +{ + struct plat_stmmacenet_data *plat_dat; + struct imx_priv_data *dwmac = priv; + int ret; + + plat_dat = dwmac->plat_dat; + + ret = clk_prepare_enable(dwmac->clk_mem); + if (ret) { + dev_err(&pdev->dev, "mem clock enable failed\n"); + return ret; + } + + ret = clk_prepare_enable(dwmac->clk_tx); + if (ret) { + dev_err(&pdev->dev, "tx clock enable failed\n"); + goto clk_tx_en_failed; + } + + if (dwmac->ops->set_intf_mode) { + ret = dwmac->ops->set_intf_mode(plat_dat); + if (ret) + goto intf_mode_failed; + } + + return 0; + +intf_mode_failed: + clk_disable_unprepare(dwmac->clk_tx); +clk_tx_en_failed: + clk_disable_unprepare(dwmac->clk_mem); + return ret; +} + +static void imx_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct imx_priv_data *dwmac = priv; + + if (dwmac->clk_tx) + clk_disable_unprepare(dwmac->clk_tx); + clk_disable_unprepare(dwmac->clk_mem); +} + +static void imx_dwmac_fix_speed(void *priv, unsigned int speed) +{ + struct plat_stmmacenet_data *plat_dat; + struct imx_priv_data *dwmac = priv; + unsigned long rate; + int err; + + plat_dat = dwmac->plat_dat; + + if (dwmac->ops->mac_rgmii_txclk_auto_adj || + (plat_dat->interface == PHY_INTERFACE_MODE_RMII) || + (plat_dat->interface == PHY_INTERFACE_MODE_MII)) + return; + + switch (speed) { + case SPEED_1000: + rate = 125000000; + break; + case SPEED_100: + rate = 25000000; + break; + case SPEED_10: + rate = 2500000; + break; + default: + dev_err(dwmac->dev, "invalid speed %u\n", speed); + return; + } + + err = clk_set_rate(dwmac->clk_tx, rate); + if (err < 0) + dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate); +} + +static int +imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) +{ + struct device_node *np = dev->of_node; + int err = 0; + + if (of_get_property(np, "snps,rmii_refclk_ext", NULL)) + dwmac->rmii_refclk_ext = true; + + dwmac->clk_tx = devm_clk_get(dev, "tx"); + if (IS_ERR(dwmac->clk_tx)) { + dev_err(dev, "failed to get tx clock\n"); + return PTR_ERR(dwmac->clk_tx); + } + + dwmac->clk_mem = NULL; + if (of_machine_is_compatible("fsl,imx8dxl")) { + dwmac->clk_mem = devm_clk_get(dev, "mem"); + if (IS_ERR(dwmac->clk_mem)) { + dev_err(dev, "failed to get mem clock\n"); + return PTR_ERR(dwmac->clk_mem); + } + } + + if (of_machine_is_compatible("fsl,imx8mp")) { + /* Binding doc describes the propety: + is required by i.MX8MP. + is optinoal for i.MX8DXL. + */ + dwmac->intf_regmap = syscon_regmap_lookup_by_phandle(np, "intf_mode"); + if (IS_ERR(dwmac->intf_regmap)) + return PTR_ERR(dwmac->intf_regmap); + + err = of_property_read_u32_index(np, "intf_mode", 1, &dwmac->intf_reg_off); + if (err) { + dev_err(dev, "Can't get intf mode reg offset (%d)\n", err); + return err; + } + } + + return err; +} + +static int imx_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct imx_priv_data *dwmac; + const struct imx_dwmac_ops *data; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return PTR_ERR(dwmac); + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + data = of_device_get_match_data(&pdev->dev); + if (!data) { + dev_err(&pdev->dev, "failed to get match data\n"); + ret = -EINVAL; + goto err_match_data; + } + + dwmac->ops = data; + dwmac->dev = &pdev->dev; + + ret = imx_dwmac_parse_dt(dwmac, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "failed to parse OF data\n"); + goto err_parse_dt; + } + + ret = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(dwmac->ops->addr_width)); + if (ret) { + dev_err(&pdev->dev, "DMA mask set failed\n"); + goto err_dma_mask; + } + + plat_dat->init = imx_dwmac_init; + plat_dat->exit = imx_dwmac_exit; + plat_dat->fix_mac_speed = imx_dwmac_fix_speed; + plat_dat->bsp_priv = dwmac; + dwmac->plat_dat = plat_dat; + + ret = imx_dwmac_init(pdev, dwmac); + if (ret) + goto err_dwmac_init; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + goto err_drv_probe; + + return 0; + +err_dwmac_init: +err_drv_probe: + imx_dwmac_exit(pdev, plat_dat->bsp_priv); +err_dma_mask: +err_parse_dt: +err_match_data: + stmmac_remove_config_dt(pdev, plat_dat); + return ret; +} + +static struct imx_dwmac_ops imx8mp_dwmac_data = { + .addr_width = 34, + .mac_rgmii_txclk_auto_adj = false, + .set_intf_mode = imx8mp_set_intf_mode, +}; + +static struct imx_dwmac_ops imx8dxl_dwmac_data = { + .addr_width = 32, + .mac_rgmii_txclk_auto_adj = true, + .set_intf_mode = imx8dxl_set_intf_mode, +}; + +static const struct of_device_id imx_dwmac_match[] = { + { .compatible = "nxp,imx8mp-dwmac-eqos", .data = &imx8mp_dwmac_data }, + { .compatible = "nxp,imx8dxl-dwmac-eqos", .data = &imx8dxl_dwmac_data }, + { } +}; +MODULE_DEVICE_TABLE(of, imx_dwmac_match); + +static struct platform_driver imx_dwmac_driver = { + .probe = imx_dwmac_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "imx-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = imx_dwmac_match, + }, +}; +module_platform_driver(imx_dwmac_driver); + +MODULE_AUTHOR("NXP"); +MODULE_DESCRIPTION("NXP imx8 DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index bcda49dcf619..f32317fa75c8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -507,7 +507,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) if (of_device_is_compatible(np, "snps,dwmac-4.00") || of_device_is_compatible(np, "snps,dwmac-4.10a") || - of_device_is_compatible(np, "snps,dwmac-4.20a")) { + of_device_is_compatible(np, "snps,dwmac-4.20a") || + of_device_is_compatible(np, "snps,dwmac-5.10a")) { plat->has_gmac4 = 1; plat->has_gmac = 0; plat->pmt = 1; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2c04e3b2b285..4ea226566cec 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1867,7 +1867,7 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev) */ page = phy_select_page(phydev, MII_MARVELL_VCT5_PAGE); if (page < 0) - return page; + goto restore_page; for (distance = priv->first; distance <= priv->last; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 550acf547ced..7ed0285206d0 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1977,6 +1977,10 @@ static int vsc8574_probe(struct phy_device *phydev) phydev->priv = vsc8531; + vsc8584_get_base_addr(phydev); + devm_phy_package_join(&phydev->mdio.dev, phydev, + vsc8531->base_addr, 0); + vsc8531->nleds = 4; vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES; vsc8531->hw_stats = vsc8584_hw_stats; diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index 8cdbb63d1bb0..c5a167a1c85c 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2016, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index db9f1fde3aac..8c014c98471c 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -92,5 +92,8 @@ enum { UPT1_F_RSS = cpu_to_le64(0x0002), UPT1_F_RXVLAN = cpu_to_le64(0x0004), /* VLAN tag stripping */ UPT1_F_LRO = cpu_to_le64(0x0008), + UPT1_F_RXINNEROFLD = cpu_to_le64(0x00010), /* Geneve/Vxlan rx csum + * offloading + */ }; #endif diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index c3a31646189f..a8d5ebd47c71 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -82,6 +82,7 @@ enum { VMXNET3_CMD_RESERVED3, VMXNET3_CMD_SET_COALESCE, VMXNET3_CMD_REGISTER_MEMREGS, + VMXNET3_CMD_SET_RSS_FIELDS, VMXNET3_CMD_FIRST_GET = 0xF00D0000, VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, @@ -96,19 +97,20 @@ enum { VMXNET3_CMD_GET_RESERVED1, VMXNET3_CMD_GET_TXDATA_DESC_SIZE, VMXNET3_CMD_GET_COALESCE, + VMXNET3_CMD_GET_RSS_FIELDS, }; /* * Little Endian layout of bitfields - * Byte 0 : 7.....len.....0 - * Byte 1 : rsvd gen 13.len.8 + * Byte 1 : oco gen 13.len.8 * Byte 2 : 5.msscof.0 ext1 dtype * Byte 3 : 13...msscof...6 * * Big Endian layout of bitfields - * Byte 0: 13...msscof...6 * Byte 1 : 5.msscof.0 ext1 dtype - * Byte 2 : rsvd gen 13.len.8 + * Byte 2 : oco gen 13.len.8 * Byte 3 : 7.....len.....0 * * Thus, le32_to_cpu on the dword will allow the big endian driver to read @@ -123,13 +125,13 @@ struct Vmxnet3_TxDesc { u32 msscof:14; /* MSS, checksum offset, flags */ u32 ext1:1; u32 dtype:1; /* descriptor type */ - u32 rsvd:1; + u32 oco:1; u32 gen:1; /* generation bit */ u32 len:14; #else u32 len:14; u32 gen:1; /* generation bit */ - u32 rsvd:1; + u32 oco:1; u32 dtype:1; /* descriptor type */ u32 ext1:1; u32 msscof:14; /* MSS, checksum offset, flags */ @@ -155,9 +157,10 @@ struct Vmxnet3_TxDesc { }; /* TxDesc.OM values */ -#define VMXNET3_OM_NONE 0 -#define VMXNET3_OM_CSUM 2 -#define VMXNET3_OM_TSO 3 +#define VMXNET3_OM_NONE 0 +#define VMXNET3_OM_ENCAP 1 +#define VMXNET3_OM_CSUM 2 +#define VMXNET3_OM_TSO 3 /* fields in TxDesc we access w/o using bit fields */ #define VMXNET3_TXD_EOP_SHIFT 12 @@ -224,6 +227,8 @@ struct Vmxnet3_RxDesc { #define VMXNET3_RXD_BTYPE_SHIFT 14 #define VMXNET3_RXD_GEN_SHIFT 31 +#define VMXNET3_RCD_HDR_INNER_SHIFT 13 + struct Vmxnet3_RxCompDesc { #ifdef __BIG_ENDIAN_BITFIELD u32 ext2:1; @@ -685,12 +690,22 @@ struct Vmxnet3_MemRegs { struct Vmxnet3_MemoryRegion memRegs[1]; }; +enum Vmxnet3_RSSField { + VMXNET3_RSS_FIELDS_TCPIP4 = 0x0001, + VMXNET3_RSS_FIELDS_TCPIP6 = 0x0002, + VMXNET3_RSS_FIELDS_UDPIP4 = 0x0004, + VMXNET3_RSS_FIELDS_UDPIP6 = 0x0008, + VMXNET3_RSS_FIELDS_ESPIP4 = 0x0010, + VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020, +}; + /* If the command data <= 16 bytes, use the shared memory directly. * otherwise, use variable length configuration descriptor. */ union Vmxnet3_CmdInfo { struct Vmxnet3_VariableLenConfDesc varConf; struct Vmxnet3_SetPolling setPolling; + enum Vmxnet3_RSSField setRssFields; __le64 data[2]; }; diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 722cb054a5cd..ca395f9679d0 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -842,21 +842,46 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, u8 protocol = 0; if (ctx->mss) { /* TSO */ - ctx->eth_ip_hdr_size = skb_transport_offset(skb); - ctx->l4_hdr_size = tcp_hdrlen(skb); - ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size; + if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { + ctx->l4_offset = skb_inner_transport_offset(skb); + ctx->l4_hdr_size = inner_tcp_hdrlen(skb); + ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size; + } else { + ctx->l4_offset = skb_transport_offset(skb); + ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size; + } } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb); + /* For encap packets, skb_checksum_start_offset refers + * to inner L4 offset. Thus, below works for encap as + * well as non-encap case + */ + ctx->l4_offset = skb_checksum_start_offset(skb); + + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation) { + struct iphdr *iph = inner_ip_hdr(skb); - if (ctx->ipv4) { - const struct iphdr *iph = ip_hdr(skb); + if (iph->version == 4) { + protocol = iph->protocol; + } else { + const struct ipv6hdr *ipv6h; + + ipv6h = inner_ipv6_hdr(skb); + protocol = ipv6h->nexthdr; + } + } else { + if (ctx->ipv4) { + const struct iphdr *iph = ip_hdr(skb); - protocol = iph->protocol; - } else if (ctx->ipv6) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + protocol = iph->protocol; + } else if (ctx->ipv6) { + const struct ipv6hdr *ipv6h; - protocol = ipv6h->nexthdr; + ipv6h = ipv6_hdr(skb); + protocol = ipv6h->nexthdr; + } } switch (protocol) { @@ -871,10 +896,10 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, break; } - ctx->copy_size = min(ctx->eth_ip_hdr_size + + ctx->copy_size = min(ctx->l4_offset + ctx->l4_hdr_size, skb->len); } else { - ctx->eth_ip_hdr_size = 0; + ctx->l4_offset = 0; ctx->l4_hdr_size = 0; /* copy as much as allowed */ ctx->copy_size = min_t(unsigned int, @@ -930,6 +955,25 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, static void +vmxnet3_prepare_inner_tso(struct sk_buff *skb, + struct vmxnet3_tx_ctx *ctx) +{ + struct tcphdr *tcph = inner_tcp_hdr(skb); + struct iphdr *iph = inner_ip_hdr(skb); + + if (iph->version == 4) { + iph->check = 0; + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, + IPPROTO_TCP, 0); + } else { + struct ipv6hdr *iph = inner_ipv6_hdr(skb); + + tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0, + IPPROTO_TCP, 0); + } +} + +static void vmxnet3_prepare_tso(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx) { @@ -987,6 +1031,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, /* Use temporary descriptor to avoid touching bits multiple times */ union Vmxnet3_GenericDesc tempTxDesc; #endif + struct udphdr *udph; count = txd_estimate(skb); @@ -1003,7 +1048,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } - vmxnet3_prepare_tso(skb, &ctx); + if (skb->encapsulation) { + vmxnet3_prepare_inner_tso(skb, &ctx); + } else { + vmxnet3_prepare_tso(skb, &ctx); + } } else { if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) { @@ -1026,14 +1075,14 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, BUG_ON(ret <= 0 && ctx.copy_size != 0); /* hdrs parsed, check against other limits */ if (ctx.mss) { - if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size > + if (unlikely(ctx.l4_offset + ctx.l4_hdr_size > VMXNET3_MAX_TX_BUF_SIZE)) { tq->stats.drop_oversized_hdr++; goto drop_pkt; } } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (unlikely(ctx.eth_ip_hdr_size + + if (unlikely(ctx.l4_offset + skb->csum_offset > VMXNET3_MAX_CSUM_OFFSET)) { tq->stats.drop_oversized_hdr++; @@ -1080,16 +1129,34 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, #endif tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); if (ctx.mss) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_TSO; - gdesc->txd.msscof = ctx.mss; + if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { + gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = ctx.mss; + + udph = udp_hdr(skb); + if (udph->check) + gdesc->txd.oco = 1; + } else { + gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.msscof = ctx.mss; + } num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss; } else { if (skb->ip_summed == CHECKSUM_PARTIAL) { - gdesc->txd.hlen = ctx.eth_ip_hdr_size; - gdesc->txd.om = VMXNET3_OM_CSUM; - gdesc->txd.msscof = ctx.eth_ip_hdr_size + - skb->csum_offset; + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation) { + gdesc->txd.hlen = ctx.l4_offset + + ctx.l4_hdr_size; + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = 0; /* Reserved */ + } else { + gdesc->txd.hlen = ctx.l4_offset; + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + } } else { gdesc->txd.om = 0; gdesc->txd.msscof = 0; @@ -1168,13 +1235,21 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, (le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { skb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); - BUG_ON(gdesc->rcd.frg); + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); + WARN_ON_ONCE(gdesc->rcd.frg && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & (1 << VMXNET3_RCD_TUC_SHIFT))) { skb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); - BUG_ON(gdesc->rcd.frg); + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); + WARN_ON_ONCE(gdesc->rcd.frg && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); } else { if (gdesc->rcd.csum) { skb->csum = htons(gdesc->rcd.csum); @@ -2429,6 +2504,10 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) devRead->misc.uptFeatures |= UPT1_F_RXVLAN; + if (adapter->netdev->features & (NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM)) + devRead->misc.uptFeatures |= UPT1_F_RXINNEROFLD; + devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu); devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa); devRead->misc.queueDescLen = cpu_to_le32( @@ -2554,6 +2633,39 @@ vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter) spin_unlock_irqrestore(&adapter->cmd_lock, flags); } +static void +vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_4(adapter)) + return; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + + if (adapter->default_rss_fields) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + adapter->rss_fields = + VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } else { + cmdInfo->setRssFields = adapter->rss_fields; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RSS_FIELDS); + /* Not all requested RSS may get applied, so get and + * cache what was actually applied. + */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + adapter->rss_fields = + VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } + + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + int vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) { @@ -2603,6 +2715,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) } vmxnet3_init_coalesce(adapter); + vmxnet3_init_rssfields(adapter); for (i = 0; i < adapter->num_rx_queues; i++) { VMXNET3_WRITE_BAR0_REG(adapter, @@ -3039,6 +3152,18 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_LRO; + + if (VMXNET3_VERSION_GE_4(adapter)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->hw_enc_features = NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + if (dma64) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->vlan_features = netdev->hw_features & @@ -3382,7 +3507,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_3)) { + if (ver & (1 << VMXNET3_REV_4)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_4); + adapter->version = VMXNET3_REV_4 + 1; + } else if (ver & (1 << VMXNET3_REV_3)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_3); @@ -3430,6 +3560,11 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->default_coal_mode = true; } + if (VMXNET3_VERSION_GE_4(adapter)) { + adapter->default_rss_fields = true; + adapter->rss_fields = VMXNET3_RSS_FIELDS_DEFAULT; + } + SET_NETDEV_DEV(netdev, &pdev->dev); vmxnet3_declare_features(adapter, dma64); diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 6528940ce5f3..bfdda0f34b97 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -267,14 +267,43 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev, return features; } +static void vmxnet3_enable_encap_offloads(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + if (VMXNET3_VERSION_GE_4(adapter)) { + netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + } +} + +static void vmxnet3_disable_encap_offloads(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + if (VMXNET3_VERSION_GE_4(adapter)) { + netdev->hw_enc_features &= ~(NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM); + } +} + int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); unsigned long flags; netdev_features_t changed = features ^ netdev->features; + netdev_features_t tun_offload_mask = NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + u8 udp_tun_enabled = (netdev->features & tun_offload_mask) != 0; if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | - NETIF_F_HW_VLAN_CTAG_RX)) { + NETIF_F_HW_VLAN_CTAG_RX | tun_offload_mask)) { if (features & NETIF_F_RXCSUM) adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXCSUM; @@ -297,6 +326,17 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_RXVLAN; + if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) { + vmxnet3_enable_encap_offloads(netdev); + adapter->shared->devRead.misc.uptFeatures |= + UPT1_F_RXINNEROFLD; + } else if ((features & tun_offload_mask) == 0 && + udp_tun_enabled) { + vmxnet3_disable_encap_offloads(netdev); + adapter->shared->devRead.misc.uptFeatures &= + ~UPT1_F_RXINNEROFLD; + } + spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_FEATURE); @@ -665,18 +705,232 @@ out: return err; } +static int +vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter, + struct ethtool_rxnfc *info) +{ + enum Vmxnet3_RSSField rss_fields; + + if (netif_running(adapter->netdev)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + rss_fields = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } else { + rss_fields = adapter->rss_fields; + } + + info->data = 0; + + /* Report default options for RSS on vmxnet3 */ + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3 | + RXH_IP_SRC | RXH_IP_DST; + break; + case UDP_V4_FLOW: + if (rss_fields & VMXNET3_RSS_FIELDS_UDPIP4) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + if (rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* fallthrough */ + case SCTP_V4_FLOW: + case IPV4_FLOW: + info->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case UDP_V6_FLOW: + if (rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + case IPV6_FLOW: + info->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +vmxnet3_set_rss_hash_opt(struct net_device *netdev, + struct vmxnet3_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + enum Vmxnet3_RSSField rss_fields = adapter->rss_fields; + + /* RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_fields &= ~VMXNET3_RSS_FIELDS_UDPIP4; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_fields |= VMXNET3_RSS_FIELDS_UDPIP4; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_fields &= ~VMXNET3_RSS_FIELDS_UDPIP6; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_fields |= VMXNET3_RSS_FIELDS_UDPIP6; + break; + default: + return -EINVAL; + } + break; + case ESP_V4_FLOW: + case AH_V4_FLOW: + case AH_ESP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP4; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_fields |= VMXNET3_RSS_FIELDS_ESPIP4; + break; + default: + return -EINVAL; + } + break; + case ESP_V6_FLOW: + case AH_V6_FLOW: + case AH_ESP_V6_FLOW: + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (rss_fields != adapter->rss_fields) { + adapter->default_rss_fields = false; + if (netif_running(netdev)) { + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + spin_lock_irqsave(&adapter->cmd_lock, flags); + cmdInfo->setRssFields = rss_fields; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RSS_FIELDS); + + /* Not all requested RSS may get applied, so get and + * cache what was actually applied. + */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_RSS_FIELDS); + adapter->rss_fields = + VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } else { + /* When the device is activated, we will try to apply + * these rules and cache the applied value later. + */ + adapter->rss_fields = rss_fields; + } + } + return 0; +} static int vmxnet3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, u32 *rules) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int err = 0; + switch (info->cmd) { case ETHTOOL_GRXRINGS: info->data = adapter->num_rx_queues; - return 0; + break; + case ETHTOOL_GRXFH: + if (!VMXNET3_VERSION_GE_4(adapter)) { + err = -EOPNOTSUPP; + break; + } + err = vmxnet3_get_rss_hash_opts(adapter, info); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int +vmxnet3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int err = 0; + + if (!VMXNET3_VERSION_GE_4(adapter)) { + err = -EOPNOTSUPP; + goto done; + } + + switch (info->cmd) { + case ETHTOOL_SRXFH: + err = vmxnet3_set_rss_hash_opt(netdev, adapter, info); + break; + default: + err = -EOPNOTSUPP; + break; } - return -EOPNOTSUPP; + +done: + return err; } #ifdef VMXNET3_RSS @@ -887,6 +1141,7 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = { .get_ringparam = vmxnet3_get_ringparam, .set_ringparam = vmxnet3_set_ringparam, .get_rxnfc = vmxnet3_get_rxnfc, + .set_rxnfc = vmxnet3_set_rxnfc, #ifdef VMXNET3_RSS .get_rxfh_indir_size = vmxnet3_get_rss_indir_size, .get_rxfh = vmxnet3_get_rss, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 1cc1cd4aaa59..5d2b062215a2 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -69,18 +69,19 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.17.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.5.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01041100 +#define VMXNET3_DRIVER_VERSION_NUM 0x01050000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ #define VMXNET3_RSS #endif +#define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ #define VMXNET3_REV_3 2 /* Vmxnet3 Rev. 3 */ #define VMXNET3_REV_2 1 /* Vmxnet3 Rev. 2 */ #define VMXNET3_REV_1 0 /* Vmxnet3 Rev. 1 */ @@ -218,10 +219,16 @@ struct vmxnet3_tx_ctx { bool ipv4; bool ipv6; u16 mss; - u32 eth_ip_hdr_size; /* only valid for pkts requesting tso or csum - * offloading + u32 l4_offset; /* only valid for pkts requesting tso or csum + * offloading. For encap offload, it refers to + * inner L4 offset i.e. it includes outer header + * encap header and inner eth and ip header size + */ + + u32 l4_hdr_size; /* only valid if mss != 0 + * Refers to inner L4 hdr size for encap + * offload */ - u32 l4_hdr_size; /* only valid if mss != 0 */ u32 copy_size; /* # of bytes copied into the data ring */ union Vmxnet3_GenericDesc *sop_txd; union Vmxnet3_GenericDesc *eop_txd; @@ -376,6 +383,8 @@ struct vmxnet3_adapter { u16 rxdata_desc_size; bool rxdataring_enabled; + bool default_rss_fields; + enum Vmxnet3_RSSField rss_fields; struct work_struct work; @@ -412,6 +421,8 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_2 + 1) #define VMXNET3_VERSION_GE_3(adapter) \ (adapter->version >= VMXNET3_REV_3 + 1) +#define VMXNET3_VERSION_GE_4(adapter) \ + (adapter->version >= VMXNET3_REV_4 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 @@ -435,6 +446,8 @@ struct vmxnet3_adapter { #define VMXNET3_COAL_RBC_RATE(usecs) (1000000 / usecs) #define VMXNET3_COAL_RBC_USECS(rbc_rate) (1000000 / rbc_rate) +#define VMXNET3_RSS_FIELDS_DEFAULT (VMXNET3_RSS_FIELDS_TCPIP4 | \ + VMXNET3_RSS_FIELDS_TCPIP6) int vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c15a92163c1f..4c972d8abf31 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1313,8 +1313,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; - struct linger sol = { .l_onoff = 1, .l_linger = 0 }; - int ret, opt, rcv_pdu_size; + int ret, rcv_pdu_size; queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); @@ -1337,60 +1336,24 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } /* Single syn retry */ - opt = 1; - ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT, - (char *)&opt, sizeof(opt)); - if (ret) { - dev_err(nctrl->device, - "failed to set TCP_SYNCNT sock opt %d\n", ret); - goto err_sock; - } + tcp_sock_set_syncnt(queue->sock->sk, 1); /* Set TCP no delay */ - opt = 1; - ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, - TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) { - dev_err(nctrl->device, - "failed to set TCP_NODELAY sock opt %d\n", ret); - goto err_sock; - } + tcp_sock_set_nodelay(queue->sock->sk); /* * Cleanup whatever is sitting in the TCP transmit queue on socket * close. This is done to prevent stale data from being sent should * the network connection be restored before TCP times out. */ - ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER, - (char *)&sol, sizeof(sol)); - if (ret) { - dev_err(nctrl->device, - "failed to set SO_LINGER sock opt %d\n", ret); - goto err_sock; - } + sock_no_linger(queue->sock->sk); - if (so_priority > 0) { - ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY, - (char *)&so_priority, sizeof(so_priority)); - if (ret) { - dev_err(ctrl->ctrl.device, - "failed to set SO_PRIORITY sock opt, ret %d\n", - ret); - goto err_sock; - } - } + if (so_priority > 0) + sock_set_priority(queue->sock->sk, so_priority); /* Set socket type of service */ - if (nctrl->opts->tos >= 0) { - opt = nctrl->opts->tos; - ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS, - (char *)&opt, sizeof(opt)); - if (ret) { - dev_err(nctrl->device, - "failed to set IP_TOS sock opt %d\n", ret); - goto err_sock; - } - } + if (nctrl->opts->tos >= 0) + ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos); queue->sock->sk->sk_allocation = GFP_ATOMIC; nvme_tcp_set_queue_io_cpu(queue); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f0da04e960f4..4546049a96b3 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1429,7 +1429,6 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; struct inet_sock *inet = inet_sk(sock->sk); - struct linger sol = { .l_onoff = 1, .l_linger = 0 }; int ret; ret = kernel_getsockname(sock, @@ -1447,27 +1446,14 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) * close. This is done to prevent stale data from being sent should * the network connection be restored before TCP times out. */ - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&sol, sizeof(sol)); - if (ret) - return ret; + sock_no_linger(sock->sk); - if (so_priority > 0) { - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY, - (char *)&so_priority, sizeof(so_priority)); - if (ret) - return ret; - } + if (so_priority > 0) + sock_set_priority(sock->sk, so_priority); /* Set socket type of service */ - if (inet->rcv_tos > 0) { - int tos = inet->rcv_tos; - - ret = kernel_setsockopt(sock, SOL_IP, IP_TOS, - (char *)&tos, sizeof(tos)); - if (ret) - return ret; - } + if (inet->rcv_tos > 0) + ip_sock_set_tos(sock->sk, inet->rcv_tos); write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = queue; @@ -1588,7 +1574,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port; __kernel_sa_family_t af; - int opt, ret; + int ret; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -1632,30 +1618,10 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) port->sock->sk->sk_user_data = port; port->data_ready = port->sock->sk->sk_data_ready; port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; - - opt = 1; - ret = kernel_setsockopt(port->sock, IPPROTO_TCP, - TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) { - pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); - goto err_sock; - } - - ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&opt, sizeof(opt)); - if (ret) { - pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret); - goto err_sock; - } - - if (so_priority > 0) { - ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY, - (char *)&so_priority, sizeof(so_priority)); - if (ret) { - pr_err("failed to set SO_PRIORITY sock opt %d\n", ret); - goto err_sock; - } - } + sock_set_reuseaddr(port->sock->sk); + tcp_sock_set_nodelay(port->sock->sk); + if (so_priority > 0) + sock_set_priority(port->sock->sk, so_priority); ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, sizeof(port->addr)); diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 1f93ea381353..922484ea4e30 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config ISCSI_TARGET tristate "Linux-iSCSI.org iSCSI Target Mode Stack" - depends on NET + depends on INET select CRYPTO select CRYPTO_CRC32C select CRYPTO_CRC32C_INTEL if X86 diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 731ee67fe914..85748e338858 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -15,6 +15,7 @@ #include <linux/sched/signal.h> #include <linux/idr.h> #include <linux/tcp.h> /* TCP_NODELAY */ +#include <net/ip.h> #include <net/ipv6.h> /* ipv6_addr_v4mapped() */ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> @@ -855,7 +856,7 @@ int iscsit_setup_np( struct sockaddr_storage *sockaddr) { struct socket *sock = NULL; - int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len; + int backlog = ISCSIT_TCP_BACKLOG, ret, len; switch (np->np_network_transport) { case ISCSI_TCP: @@ -897,34 +898,10 @@ int iscsit_setup_np( /* * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. */ - /* FIXME: Someone please explain why this is endian-safe */ - opt = 1; - if (np->np_network_transport == ISCSI_TCP) { - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - pr_err("kernel_setsockopt() for TCP_NODELAY" - " failed: %d\n", ret); - goto fail; - } - } - - /* FIXME: Someone please explain why this is endian-safe */ - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - pr_err("kernel_setsockopt() for SO_REUSEADDR" - " failed\n"); - goto fail; - } - - ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - pr_err("kernel_setsockopt() for IP_FREEBIND" - " failed\n"); - goto fail; - } + if (np->np_network_transport == ISCSI_TCP) + tcp_sock_set_nodelay(sock->sk); + sock_set_reuseaddr(sock->sk); + ip_sock_set_freebind(sock->sk); ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len); if (ret < 0) { diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 1ecc67da6c1a..e313dae01674 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -37,7 +37,6 @@ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; - unsigned int min_level; int ret; _enter(""); @@ -57,9 +56,8 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); - min_level = RXRPC_SECURITY_ENCRYPT; - ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, - (void *)&min_level, sizeof(min_level)); + ret = rxrpc_sock_set_min_security_level(socket->sk, + RXRPC_SECURITY_ENCRYPT); if (ret < 0) goto error_2; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 28268ed461b8..ad8fb53b3682 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_rcvbuf = 140 * 1024; } - if (server->tcp_nodelay) { - int val = 1; - rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rc) - cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n", - rc); - } + if (server->tcp_nodelay) + tcp_sock_set_nodelay(socket->sk); cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n", socket->sk->sk_sndbuf, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c97570eb2c18..99760063e000 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -325,7 +325,6 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, size_t total_len = 0, sent, size; struct socket *ssocket = server->ssocket; struct msghdr smb_msg; - int val = 1; __be32 rfc1002_marker; if (cifs_rdma_enabled(server)) { @@ -345,8 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, } /* cork the socket */ - kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, - (char *)&val, sizeof(val)); + tcp_sock_set_cork(ssocket->sk, true); for (j = 0; j < num_rqst; j++) send_length += smb_rqst_len(server, &rqst[j]); @@ -435,9 +433,7 @@ unmask: } /* uncork it */ - val = 0; - kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, - (char *)&val, sizeof(val)); + tcp_sock_set_cork(ssocket->sk, false); if ((total_len > 0) && (total_len != send_length)) { cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index cdfaf4f0e11a..3543a8fec907 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -724,7 +724,7 @@ out_close: } /* Listening socket is busy, accept a connection */ -static int tcp_accept_from_sock(struct connection *con) +static int accept_from_sock(struct connection *con) { int result; struct sockaddr_storage peeraddr; @@ -852,123 +852,6 @@ accept_err: return result; } -static int sctp_accept_from_sock(struct connection *con) -{ - /* Check that the new node is in the lockspace */ - struct sctp_prim prim; - int nodeid; - int prim_len, ret; - int addr_len; - struct connection *newcon; - struct connection *addcon; - struct socket *newsock; - - mutex_lock(&connections_lock); - if (!dlm_allow_conn) { - mutex_unlock(&connections_lock); - return -1; - } - mutex_unlock(&connections_lock); - - mutex_lock_nested(&con->sock_mutex, 0); - - ret = kernel_accept(con->sock, &newsock, O_NONBLOCK); - if (ret < 0) - goto accept_err; - - memset(&prim, 0, sizeof(struct sctp_prim)); - prim_len = sizeof(struct sctp_prim); - - ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR, - (char *)&prim, &prim_len); - if (ret < 0) { - log_print("getsockopt/sctp_primary_addr failed: %d", ret); - goto accept_err; - } - - make_sockaddr(&prim.ssp_addr, 0, &addr_len); - ret = addr_to_nodeid(&prim.ssp_addr, &nodeid); - if (ret) { - unsigned char *b = (unsigned char *)&prim.ssp_addr; - - log_print("reject connect from unknown addr"); - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, - b, sizeof(struct sockaddr_storage)); - goto accept_err; - } - - newcon = nodeid2con(nodeid, GFP_NOFS); - if (!newcon) { - ret = -ENOMEM; - goto accept_err; - } - - mutex_lock_nested(&newcon->sock_mutex, 1); - - if (newcon->sock) { - struct connection *othercon = newcon->othercon; - - if (!othercon) { - othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); - if (!othercon) { - log_print("failed to allocate incoming socket"); - mutex_unlock(&newcon->sock_mutex); - ret = -ENOMEM; - goto accept_err; - } - othercon->nodeid = nodeid; - othercon->rx_action = receive_from_sock; - mutex_init(&othercon->sock_mutex); - INIT_LIST_HEAD(&othercon->writequeue); - spin_lock_init(&othercon->writequeue_lock); - INIT_WORK(&othercon->swork, process_send_sockets); - INIT_WORK(&othercon->rwork, process_recv_sockets); - set_bit(CF_IS_OTHERCON, &othercon->flags); - } - mutex_lock_nested(&othercon->sock_mutex, 2); - if (!othercon->sock) { - newcon->othercon = othercon; - add_sock(newsock, othercon); - addcon = othercon; - mutex_unlock(&othercon->sock_mutex); - } else { - printk("Extra connection from node %d attempted\n", nodeid); - ret = -EAGAIN; - mutex_unlock(&othercon->sock_mutex); - mutex_unlock(&newcon->sock_mutex); - goto accept_err; - } - } else { - newcon->rx_action = receive_from_sock; - add_sock(newsock, newcon); - addcon = newcon; - } - - log_print("connected to %d", nodeid); - - mutex_unlock(&newcon->sock_mutex); - - /* - * Add it to the active queue in case we got data - * between processing the accept adding the socket - * to the read_sockets list - */ - if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) - queue_work(recv_workqueue, &addcon->rwork); - mutex_unlock(&con->sock_mutex); - - return 0; - -accept_err: - mutex_unlock(&con->sock_mutex); - if (newsock) - sock_release(newsock); - if (ret != -EAGAIN) - log_print("error accepting connection from node: %d", ret); - - return ret; -} - static void free_entry(struct writequeue_entry *e) { __free_page(e->page); @@ -999,6 +882,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed) static int sctp_bind_addrs(struct connection *con, uint16_t port) { struct sockaddr_storage localaddr; + struct sockaddr *addr = (struct sockaddr *)&localaddr; int i, addr_len, result = 0; for (i = 0; i < dlm_local_count; i++) { @@ -1006,13 +890,9 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port) make_sockaddr(&localaddr, port, &addr_len); if (!i) - result = kernel_bind(con->sock, - (struct sockaddr *)&localaddr, - addr_len); + result = kernel_bind(con->sock, addr, addr_len); else - result = kernel_setsockopt(con->sock, SOL_SCTP, - SCTP_SOCKOPT_BINDX_ADD, - (char *)&localaddr, addr_len); + result = sock_bind_add(con->sock->sk, addr, addr_len); if (result < 0) { log_print("Can't bind to %d addr number %d, %d.\n", @@ -1031,11 +911,9 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port) static void sctp_connect_to_sock(struct connection *con) { struct sockaddr_storage daddr; - int one = 1; int result; int addr_len; struct socket *sock; - struct __kernel_sock_timeval tv = { .tv_sec = 5, .tv_usec = 0 }; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -1079,21 +957,17 @@ static void sctp_connect_to_sock(struct connection *con) log_print("connecting to %d", con->nodeid); /* Turn off Nagle's algorithm */ - kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, - sizeof(one)); + sctp_sock_set_nodelay(sock->sk); /* * Make sock->ops->connect() function return in specified time, * since O_NONBLOCK argument in connect() function does not work here, * then, we should restore the default value of this attribute. */ - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv, - sizeof(tv)); + sock_set_sndtimeo(sock->sk, 5); result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, 0); - memset(&tv, 0, sizeof(tv)); - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv, - sizeof(tv)); + sock_set_sndtimeo(sock->sk, 0); if (result == -EINPROGRESS) result = 0; @@ -1132,7 +1006,6 @@ static void tcp_connect_to_sock(struct connection *con) struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; - int one = 1; int result; if (con->nodeid == 0) { @@ -1181,8 +1054,7 @@ static void tcp_connect_to_sock(struct connection *con) log_print("connecting to %d", con->nodeid); /* Turn off Nagle's algorithm */ - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, - sizeof(one)); + tcp_sock_set_nodelay(sock->sk); result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); @@ -1224,7 +1096,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con, { struct socket *sock = NULL; int result = 0; - int one = 1; int addr_len; if (dlm_local_addr[0]->ss_family == AF_INET) @@ -1241,19 +1112,14 @@ static struct socket *tcp_create_listen_sock(struct connection *con, } /* Turn off Nagle's algorithm */ - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, - sizeof(one)); + tcp_sock_set_nodelay(sock->sk); - result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&one, sizeof(one)); + sock_set_reuseaddr(sock->sk); - if (result < 0) { - log_print("Failed to set SO_REUSEADDR on socket: %d", result); - } write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = con; save_listen_callbacks(sock); - con->rx_action = tcp_accept_from_sock; + con->rx_action = accept_from_sock; con->connect_action = tcp_connect_to_sock; write_unlock_bh(&sock->sk->sk_callback_lock); @@ -1267,11 +1133,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con, con->sock = NULL; goto create_out; } - result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&one, sizeof(one)); - if (result < 0) { - log_print("Set keepalive failed: %d", result); - } + sock_set_keepalive(sock->sk); result = sock->ops->listen(sock, 5); if (result < 0) { @@ -1309,8 +1171,6 @@ static int sctp_listen_for_all(void) struct socket *sock = NULL; int result = -EINVAL; struct connection *con = nodeid2con(0, GFP_NOFS); - int bufsize = NEEDED_RMEM; - int one = 1; if (!con) return -ENOMEM; @@ -1324,15 +1184,8 @@ static int sctp_listen_for_all(void) goto out; } - result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, - (char *)&bufsize, sizeof(bufsize)); - if (result) - log_print("Error increasing buffer space on socket %d", result); - - result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, - sizeof(one)); - if (result < 0) - log_print("Could not set SCTP NODELAY error %d\n", result); + sock_set_rcvbuf(sock->sk, NEEDED_RMEM); + sctp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); /* Init con struct */ @@ -1340,7 +1193,7 @@ static int sctp_listen_for_all(void) save_listen_callbacks(sock); con->sock = sock; con->sock->sk->sk_data_ready = lowcomms_data_ready; - con->rx_action = sctp_accept_from_sock; + con->rx_action = accept_from_sock; con->connect_action = sctp_connect_to_sock; write_unlock_bh(&sock->sk->sk_callback_lock); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2c512b40a940..79a231719460 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1441,22 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work) sc_put(sc); } -static int o2net_set_nodelay(struct socket *sock) -{ - int val = 1; - - return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (void *)&val, sizeof(val)); -} - -static int o2net_set_usertimeout(struct socket *sock) -{ - int user_timeout = O2NET_TCP_USER_TIMEOUT; - - return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (void *)&user_timeout, sizeof(user_timeout)); -} - static void o2net_initialize_handshake(void) { o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( @@ -1636,17 +1620,8 @@ static void o2net_start_connect(struct work_struct *work) goto out; } - ret = o2net_set_nodelay(sc->sc_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } - - ret = o2net_set_usertimeout(sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(sc->sc_sock->sk); + tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT); o2net_register_callbacks(sc->sc_sock->sk, sc); @@ -1832,17 +1807,8 @@ static int o2net_accept_one(struct socket *sock, int *more) *more = 1; new_sock->sk->sk_allocation = GFP_ATOMIC; - ret = o2net_set_nodelay(new_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } - - ret = o2net_set_usertimeout(new_sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(new_sock->sk); + tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT); ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1); if (ret < 0) diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index b919d143a9a6..96ebaa94a92e 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -76,7 +76,7 @@ struct aes_gcm_keymat { struct mlx5_accel_esp_xfrm_attrs { enum mlx5_accel_esp_action action; u32 esn; - u32 spi; + __be32 spi; u32 seq; u32 tfc_pad; u32 flags; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 405a7ddd406c..13c0e4556eda 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -201,7 +201,7 @@ struct mlx5_rsc_debug { void *object; enum dbg_rsc_type type; struct dentry *root; - struct mlx5_field_desc fields[0]; + struct mlx5_field_desc fields[]; }; enum mlx5_dev_event { @@ -298,7 +298,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; + struct mlx5_cmd_stats *stats; }; struct mlx5_port_caps { @@ -832,11 +832,6 @@ static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; } -static inline u16 cmdif_rev(struct mlx5_core_dev *dev) -{ - return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; -} - static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; @@ -1028,11 +1023,6 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, const u8 *mac, bool vlan, u16 vlan_id, u8 port_num); -static inline int fw_initializing(struct mlx5_core_dev *dev) -{ - return ioread32be(&dev->iseg->initializing) >> 31; -} - static inline u32 mlx5_mkey_to_idx(u32 mkey) { return mkey >> 8; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e2d13e074067..6c5aa0a21425 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, MLX5_FLOW_CONTEXT_ACTION_DECRYPT = 1 << 18, + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS = 1 << 19, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fb243848132d..1a56dc079c32 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -584,9 +584,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 metadata_reg_b[0x20]; - - u8 reserved_at_1c0[0x40]; + u8 reserved_at_1a0[0x60]; }; struct mlx5_ifc_fte_match_set_misc3_bits { @@ -1210,7 +1208,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_99[0x2]; u8 log_max_qp[0x5]; - u8 reserved_at_a0[0xb]; + u8 reserved_at_a0[0x3]; + u8 ece_support[0x1]; + u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; u8 reserved_at_b0[0x10]; @@ -1703,7 +1703,7 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_140[0x4c0]; - struct mlx5_ifc_cmd_pas_bits pas[0]; + struct mlx5_ifc_cmd_pas_bits pas[]; }; struct mlx5_ifc_rq_num_bits { @@ -1921,7 +1921,7 @@ struct mlx5_ifc_resource_dump_menu_segment_bits { u8 reserved_at_20[0x10]; u8 num_of_records[0x10]; - struct mlx5_ifc_resource_dump_menu_record_bits record[0]; + struct mlx5_ifc_resource_dump_menu_record_bits record[]; }; struct mlx5_ifc_resource_dump_resource_segment_bits { @@ -1933,7 +1933,7 @@ struct mlx5_ifc_resource_dump_resource_segment_bits { u8 index2[0x20]; - u8 payload[0][0x20]; + u8 payload[][0x20]; }; struct mlx5_ifc_resource_dump_terminate_segment_bits { @@ -3010,7 +3010,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_1200[0x600]; - union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0]; + union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; }; enum { @@ -3303,7 +3303,7 @@ struct mlx5_ifc_rqtc_bits { u8 reserved_at_e0[0x6a0]; - struct mlx5_ifc_rq_num_bits rq_num[0]; + struct mlx5_ifc_rq_num_bits rq_num[]; }; enum { @@ -3415,7 +3415,7 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_7e0[0x20]; - u8 current_uc_mac_address[0][0x40]; + u8 current_uc_mac_address[][0x40]; }; enum { @@ -4218,7 +4218,8 @@ struct mlx5_ifc_rts2rts_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rts2rts_qp_in_bits { @@ -4235,7 +4236,7 @@ struct mlx5_ifc_rts2rts_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -4248,7 +4249,8 @@ struct mlx5_ifc_rtr2rts_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rtr2rts_qp_in_bits { @@ -4265,7 +4267,7 @@ struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -4338,7 +4340,7 @@ struct mlx5_ifc_query_xrc_srq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_xrc_srq_in_bits { @@ -4616,7 +4618,7 @@ struct mlx5_ifc_query_srq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_srq_in_bits { @@ -4817,7 +4819,8 @@ struct mlx5_ifc_query_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; u8 opt_param_mask[0x20]; @@ -4827,7 +4830,7 @@ struct mlx5_ifc_query_qp_out_bits { u8 reserved_at_800[0x80]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_qp_in_bits { @@ -5160,7 +5163,7 @@ struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 reserved_at_40[0x40]; - struct mlx5_ifc_pkey_bits pkey[0]; + struct mlx5_ifc_pkey_bits pkey[]; }; struct mlx5_ifc_query_hca_vport_pkey_in_bits { @@ -5196,7 +5199,7 @@ struct mlx5_ifc_query_hca_vport_gid_out_bits { u8 gids_num[0x10]; u8 reserved_at_70[0x10]; - struct mlx5_ifc_array128_auto_bits gid[0]; + struct mlx5_ifc_array128_auto_bits gid[]; }; struct mlx5_ifc_query_hca_vport_gid_in_bits { @@ -5464,7 +5467,7 @@ struct mlx5_ifc_query_flow_counter_out_bits { u8 reserved_at_40[0x40]; - struct mlx5_ifc_traffic_counter_bits flow_statistics[0]; + struct mlx5_ifc_traffic_counter_bits flow_statistics[]; }; struct mlx5_ifc_query_flow_counter_in_bits { @@ -5558,7 +5561,7 @@ struct mlx5_ifc_query_eq_out_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_eq_in_bits { @@ -5583,7 +5586,7 @@ struct mlx5_ifc_packet_reformat_context_in_bits { u8 reserved_at_20[0x10]; u8 reformat_data[2][0x8]; - u8 more_reformat_data[0][0x8]; + u8 more_reformat_data[][0x8]; }; struct mlx5_ifc_query_packet_reformat_context_out_bits { @@ -5594,7 +5597,7 @@ struct mlx5_ifc_query_packet_reformat_context_out_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[]; }; struct mlx5_ifc_query_packet_reformat_context_in_bits { @@ -5833,7 +5836,7 @@ struct mlx5_ifc_query_cq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_cq_in_bits { @@ -6440,7 +6443,7 @@ struct mlx5_ifc_modify_cq_in_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_modify_cong_status_out_bits { @@ -6504,7 +6507,7 @@ struct mlx5_ifc_manage_pages_out_bits { u8 reserved_at_60[0x20]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; enum { @@ -6526,7 +6529,7 @@ struct mlx5_ifc_manage_pages_in_bits { u8 input_num_entries[0x20]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_mad_ifc_out_bits { @@ -6582,7 +6585,8 @@ struct mlx5_ifc_init2rtr_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2rtr_qp_in_bits { @@ -6599,7 +6603,7 @@ struct mlx5_ifc_init2rtr_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -7481,7 +7485,7 @@ struct mlx5_ifc_create_xrc_srq_in_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_tis_out_bits { @@ -7557,7 +7561,7 @@ struct mlx5_ifc_create_srq_in_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_sq_out_bits { @@ -7695,7 +7699,7 @@ struct mlx5_ifc_create_qp_out_bits { u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_at_60[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_create_qp_in_bits { @@ -7709,7 +7713,7 @@ struct mlx5_ifc_create_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -7718,7 +7722,7 @@ struct mlx5_ifc_create_qp_in_bits { u8 wq_umem_valid[0x1]; u8 reserved_at_861[0x1f]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_psv_out_bits { @@ -7789,7 +7793,7 @@ struct mlx5_ifc_create_mkey_in_bits { u8 reserved_at_320[0x560]; - u8 klm_pas_mtt[0][0x20]; + u8 klm_pas_mtt[][0x20]; }; enum { @@ -7922,7 +7926,7 @@ struct mlx5_ifc_create_eq_in_bits { u8 reserved_at_3c0[0x4c0]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_dct_out_bits { @@ -7979,7 +7983,7 @@ struct mlx5_ifc_create_cq_in_bits { u8 cq_umem_valid[0x1]; u8 reserved_at_2e1[0x59f]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_config_int_moderation_out_bits { @@ -8335,7 +8339,7 @@ struct mlx5_ifc_access_register_out_bits { u8 reserved_at_40[0x40]; - u8 register_data[0][0x20]; + u8 register_data[][0x20]; }; enum { @@ -8355,7 +8359,7 @@ struct mlx5_ifc_access_register_in_bits { u8 argument[0x20]; - u8 register_data[0][0x20]; + u8 register_data[][0x20]; }; struct mlx5_ifc_sltp_reg_bits { @@ -9372,7 +9376,7 @@ struct mlx5_ifc_cmd_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 command[0][0x20]; + u8 command[][0x20]; }; struct mlx5_ifc_cmd_if_box_bits { @@ -9666,7 +9670,7 @@ struct mlx5_ifc_mcqi_reg_bits { u8 reserved_at_a0[0x10]; u8 data_size[0x10]; - union mlx5_ifc_mcqi_reg_data_bits data[0]; + union mlx5_ifc_mcqi_reg_data_bits data[]; }; struct mlx5_ifc_mcc_reg_bits { @@ -10252,7 +10256,7 @@ struct mlx5_ifc_umem_bits { u8 num_of_mtt[0x40]; - struct mlx5_ifc_mtt_bits mtt[0]; + struct mlx5_ifc_mtt_bits mtt[]; }; struct mlx5_ifc_uctx_bits { @@ -10377,7 +10381,7 @@ struct mlx5_ifc_mtrc_stdb_bits { u8 reserved_at_4[0x4]; u8 read_size[0x18]; u8 start_offset[0x20]; - u8 string_db_data[0]; + u8 string_db_data[]; }; struct mlx5_ifc_mtrc_ctrl_bits { @@ -10431,7 +10435,7 @@ struct mlx5_ifc_query_esw_functions_out_bits { struct mlx5_ifc_host_params_context_bits host_params_context; u8 reserved_at_280[0x180]; - u8 host_sf_enable[0][0x40]; + u8 host_sf_enable[][0x40]; }; struct mlx5_ifc_sf_partition_bits { @@ -10451,7 +10455,7 @@ struct mlx5_ifc_query_sf_partitions_out_bits { u8 reserved_at_60[0x20]; - struct mlx5_ifc_sf_partition_bits sf_partition[0]; + struct mlx5_ifc_sf_partition_bits sf_partition[]; }; struct mlx5_ifc_query_sf_partitions_in_bits { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f23eb18526fe..1af5e460b5f6 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -408,7 +408,7 @@ struct mlx5_wqe_signature_seg { struct mlx5_wqe_inline_seg { __be32 byte_count; - __be32 data[0]; + __be32 data[]; }; enum mlx5_sig_type { diff --git a/include/linux/net.h b/include/linux/net.h index 6451425e828f..e10f378194a5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -303,10 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); -int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, - int *optlen); -int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, - unsigned int optlen); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bf44e85d709d..9aac824c523c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -497,4 +497,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); +void tcp_sock_set_cork(struct sock *sk, bool on); +int tcp_sock_set_keepcnt(struct sock *sk, int val); +int tcp_sock_set_keepidle(struct sock *sk, int val); +int tcp_sock_set_keepintvl(struct sock *sk, int val); +void tcp_sock_set_nodelay(struct sock *sk); +void tcp_sock_set_quickack(struct sock *sk, int val); +int tcp_sock_set_syncnt(struct sock *sk, int val); +void tcp_sock_set_user_timeout(struct sock *sk, u32 val); + #endif /* _LINUX_TCP_H */ diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index ab988940bf04..91eacbdcf33d 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -72,4 +72,6 @@ bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, unsigned long); +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); + #endif /* _NET_RXRPC_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 5b317c9f4470..04ebe7bf54c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -765,4 +765,10 @@ static inline bool inetdev_valid_mtu(unsigned int mtu) return likely(mtu >= IPV4_MIN_MTU); } +void ip_sock_set_freebind(struct sock *sk); +int ip_sock_set_mtu_discover(struct sock *sk, int val); +void ip_sock_set_pktinfo(struct sock *sk); +void ip_sock_set_recverr(struct sock *sk); +void ip_sock_set_tos(struct sock *sk, int val); + #endif /* _IP_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 39a00d3ef5e2..5e65bf2fd32d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1177,4 +1177,96 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex, const struct in6_addr *addr, unsigned int mode); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); + +static inline int ip6_sock_set_v6only(struct sock *sk) +{ + if (inet_sk(sk)->inet_num) + return -EINVAL; + lock_sock(sk); + sk->sk_ipv6only = true; + release_sock(sk); + return 0; +} + +static inline void ip6_sock_set_recverr(struct sock *sk) +{ + lock_sock(sk); + inet6_sk(sk)->recverr = true; + release_sock(sk); +} + +static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +{ + unsigned int pref = 0; + unsigned int prefmask = ~0; + + /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ + switch (val & (IPV6_PREFER_SRC_PUBLIC | + IPV6_PREFER_SRC_TMP | + IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { + case IPV6_PREFER_SRC_PUBLIC: + pref |= IPV6_PREFER_SRC_PUBLIC; + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | + IPV6_PREFER_SRC_TMP); + break; + case IPV6_PREFER_SRC_TMP: + pref |= IPV6_PREFER_SRC_TMP; + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | + IPV6_PREFER_SRC_TMP); + break; + case IPV6_PREFER_SRC_PUBTMP_DEFAULT: + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | + IPV6_PREFER_SRC_TMP); + break; + case 0: + break; + default: + return -EINVAL; + } + + /* check HOME/COA conflicts */ + switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) { + case IPV6_PREFER_SRC_HOME: + prefmask &= ~IPV6_PREFER_SRC_COA; + break; + case IPV6_PREFER_SRC_COA: + pref |= IPV6_PREFER_SRC_COA; + break; + case 0: + break; + default: + return -EINVAL; + } + + /* check CGA/NONCGA conflicts */ + switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { + case IPV6_PREFER_SRC_CGA: + case IPV6_PREFER_SRC_NONCGA: + case 0: + break; + default: + return -EINVAL; + } + + inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + return 0; +} + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val) +{ + int ret; + + lock_sock(sk); + ret = __ip6_sock_set_addr_preferences(sk, val); + release_sock(sk); + return ret; +} + +static inline void ip6_sock_set_recvpktinfo(struct sock *sk) +{ + lock_sock(sk); + inet6_sk(sk)->rxopt.bits.rxinfo = true; + release_sock(sk); +} + #endif /* _NET_IPV6_H */ diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 6e622dd3122e..d7a7f7c81e7b 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -56,6 +56,12 @@ struct ipv6_stub { void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); +#if IS_ENABLED(CONFIG_XFRM) + void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); + int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); +#endif struct neigh_table *nd_tbl; }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/mpls.h b/include/net/mpls.h index ccaf238e8ea7..0bb7944e7b08 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -8,6 +8,7 @@ #include <linux/if_ether.h> #include <linux/netdevice.h> +#include <linux/mpls.h> #define MPLS_HLEN 4 @@ -25,4 +26,20 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } + +static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, + unsigned int ttl, + unsigned int tc, + bool bos) +{ + struct mpls_shim_hdr result; + + result.label_stack_entry = + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | + (tc << MPLS_LS_TC_SHIFT) | + (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | + (ttl << MPLS_LS_TTL_SHIFT)); + return result; +} + #endif diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3ab5c6bbb90b..f8bcb75bb044 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -615,4 +615,11 @@ static inline bool sctp_newsk_ready(const struct sock *sk) return sock_flag(sk, SOCK_DEAD) || sk->sk_socket; } +static inline void sctp_sock_set_nodelay(struct sock *sk) +{ + lock_sock(sk); + sctp_sk(sk)->nodelay = true; + release_sock(sk); +} + #endif /* __net_sctp_h__ */ diff --git a/include/net/sock.h b/include/net/sock.h index 3e8c6d4b4b59..6e9f713a7860 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1156,7 +1156,9 @@ struct proto { int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, - struct sockaddr *uaddr, int addr_len); + struct sockaddr *addr, int addr_len); + int (*bind_add)(struct sock *sk, + struct sockaddr *addr, int addr_len); int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); @@ -2688,4 +2690,16 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) void sock_def_readable(struct sock *sk); +int sock_bindtoindex(struct sock *sk, int ifindex); +void sock_enable_timestamps(struct sock *sk); +void sock_no_linger(struct sock *sk); +void sock_set_keepalive(struct sock *sk); +void sock_set_priority(struct sock *sk, u32 priority); +void sock_set_rcvbuf(struct sock *sk, int val); +void sock_set_reuseaddr(struct sock *sk); +void sock_set_reuseport(struct sock *sk); +void sock_set_sndtimeo(struct sock *sk, s64 secs); + +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b681338a8320..bca761ffa25f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -437,6 +437,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); void tcp_req_err(struct sock *sk, u32 seq, bool abort); +void tcp_ld_RTO_revert(struct sock *sk, u32 seq); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, @@ -661,7 +662,6 @@ void tcp_initialize_rcv_mss(struct sock *sk); int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); -void tcp_init_buffer_space(struct sock *sk); static inline void tcp_bound_rto(const struct sock *sk) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8f71c111e65a..094fe682f5d7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -361,11 +361,6 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_dstopts; int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sock *sk, struct sk_buff *skb); - int (*extract_input)(struct xfrm_state *x, - struct sk_buff *skb); - int (*extract_output)(struct xfrm_state *x, - struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); @@ -1406,6 +1401,8 @@ struct xfrm4_protocol { struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); + int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); @@ -1562,7 +1559,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); @@ -1578,7 +1574,6 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) return xfrm_input(skb, nexthdr, spi, 0); } -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); @@ -1586,10 +1581,11 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int xfrm6_transport_finish(struct sk_buff *skb, int async); int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); @@ -1602,14 +1598,15 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen); #else @@ -1992,4 +1989,20 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool xfrm6_local_dontfrag(const struct sock *sk) +{ + int proto; + + if (!sk || sk->sk_family != AF_INET6) + return false; + + proto = sk->sk_protocol; + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + + return false; +} +#endif #endif /* _NET_XFRM_H */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f8ca5edc5f2c..27d6ab11f9ee 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { - int optval = 1; - - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&optval, sizeof(optval)); - if (ret) - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", - ret); - } + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) + tcp_sock_set_nodelay(sock->sk); con->sock = sock; return 0; diff --git a/net/core/sock.c b/net/core/sock.c index fd85e651ce28..61ec573221a6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -594,6 +594,18 @@ out: return ret; } +int sock_bindtoindex(struct sock *sk, int ifindex) +{ + int ret; + + lock_sock(sk); + ret = sock_bindtoindex_locked(sk, ifindex); + release_sock(sk); + + return ret; +} +EXPORT_SYMBOL(sock_bindtoindex); + static int sock_setbindtodevice(struct sock *sk, char __user *optval, int optlen) { @@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, goto out; } - lock_sock(sk); - ret = sock_setbindtodevice_locked(sk, index); - release_sock(sk); - + return sock_bindtoindex(sk, index); out: #endif @@ -712,6 +721,111 @@ bool sk_mc_loop(struct sock *sk) } EXPORT_SYMBOL(sk_mc_loop); +void sock_set_reuseaddr(struct sock *sk) +{ + lock_sock(sk); + sk->sk_reuse = SK_CAN_REUSE; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_reuseaddr); + +void sock_set_reuseport(struct sock *sk) +{ + lock_sock(sk); + sk->sk_reuseport = true; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_reuseport); + +void sock_no_linger(struct sock *sk) +{ + lock_sock(sk); + sk->sk_lingertime = 0; + sock_set_flag(sk, SOCK_LINGER); + release_sock(sk); +} +EXPORT_SYMBOL(sock_no_linger); + +void sock_set_priority(struct sock *sk, u32 priority) +{ + lock_sock(sk); + sk->sk_priority = priority; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_priority); + +void sock_set_sndtimeo(struct sock *sk, s64 secs) +{ + lock_sock(sk); + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) + sk->sk_sndtimeo = secs * HZ; + else + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_sndtimeo); + +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) +{ + if (val) { + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); + sock_set_flag(sk, SOCK_RCVTSTAMP); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); + } else { + sock_reset_flag(sk, SOCK_RCVTSTAMP); + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + } +} + +void sock_enable_timestamps(struct sock *sk) +{ + lock_sock(sk); + __sock_set_timestamps(sk, true, false, true); + release_sock(sk); +} +EXPORT_SYMBOL(sock_enable_timestamps); + +void sock_set_keepalive(struct sock *sk) +{ + lock_sock(sk); + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, true); + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_keepalive); + +static void __sock_set_rcvbuf(struct sock *sk, int val) +{ + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it + * as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + + /* We double it on the way in to account for "struct sk_buff" etc. + * overhead. Applications assume that the SO_RCVBUF setting they make + * will allow that much actual data to be received on that socket. + * + * Applications are unaware that "struct sk_buff" and other overheads + * allocate from the receive buffer during socket buffer allocation. + * + * And after considering the possible alternatives, returning the value + * we actually used in getsockopt is the most desirable behavior. + */ + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); +} + +void sock_set_rcvbuf(struct sock *sk, int val) +{ + lock_sock(sk); + __sock_set_rcvbuf(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_rcvbuf); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -808,30 +922,7 @@ set_sndbuf: * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - val = min_t(u32, val, sysctl_rmem_max); -set_rcvbuf: - /* Ensure val * 2 fits into an int, to prevent max_t() - * from treating it as a negative value. - */ - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - /* - * We double it on the way in to account for - * "struct sk_buff" etc. overhead. Applications - * assume that the SO_RCVBUF setting they make will - * allow that much actual data to be received on that - * socket. - * - * Applications are unaware that "struct sk_buff" and - * other overheads allocate from the receive buffer - * during socket buffer allocation. - * - * And after considering the possible alternatives, - * returning the value we actually used in getsockopt - * is the most desirable behavior. - */ - WRITE_ONCE(sk->sk_rcvbuf, - max_t(int, val * 2, SOCK_MIN_RCVBUF)); + __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); break; case SO_RCVBUFFORCE: @@ -843,9 +934,8 @@ set_rcvbuf: /* No negative values (to prevent underflow, as val will be * multiplied by 2). */ - if (val < 0) - val = 0; - goto set_rcvbuf; + __sock_set_rcvbuf(sk, max(val, 0)); + break; case SO_KEEPALIVE: if (sk->sk_prot->keepalive) @@ -903,28 +993,17 @@ set_rcvbuf: break; case SO_TIMESTAMP_OLD: + __sock_set_timestamps(sk, valbool, false, false); + break; case SO_TIMESTAMP_NEW: + __sock_set_timestamps(sk, valbool, true, false); + break; case SO_TIMESTAMPNS_OLD: + __sock_set_timestamps(sk, valbool, false, true); + break; case SO_TIMESTAMPNS_NEW: - if (valbool) { - if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW) - sock_set_flag(sk, SOCK_TSTAMP_NEW); - else - sock_reset_flag(sk, SOCK_TSTAMP_NEW); - - if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW) - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); - else - sock_set_flag(sk, SOCK_RCVTSTAMPNS); - sock_set_flag(sk, SOCK_RCVTSTAMP); - sock_enable_timestamp(sk, SOCK_TIMESTAMP); - } else { - sock_reset_flag(sk, SOCK_RCVTSTAMP); - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); - sock_reset_flag(sk, SOCK_TSTAMP_NEW); - } + __sock_set_timestamps(sk, valbool, true, true); break; - case SO_TIMESTAMPING_NEW: sock_set_flag(sk, SOCK_TSTAMP_NEW); /* fall through */ @@ -1180,7 +1259,7 @@ set_rcvbuf: break; case SO_BINDTOIFINDEX: - ret = sock_setbindtodevice_locked(sk, val); + ret = sock_bindtoindex_locked(sk, val); break; default: @@ -3633,3 +3712,11 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ + +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) +{ + if (!sk->sk_prot->bind_add) + return -EOPNOTSUPP; + return sk->sk_prot->bind_add(sk, addr, addr_len); +} +EXPORT_SYMBOL(sock_bind_add); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3052da668156..780b2a15ac9b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -140,34 +140,6 @@ bool vid_is_dsa_8021q(u16 vid) } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); -static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) -{ - struct bridge_vlan_info vinfo; - struct net_device *slave; - u16 pvid; - int err; - - if (!dsa_is_user_port(ds, port)) - return 0; - - slave = dsa_to_port(ds, port)->slave; - - err = br_vlan_get_pvid(slave, &pvid); - if (!pvid || err < 0) - /* There is no pvid on the bridge for this port, which is - * perfectly valid. Nothing to restore, bye-bye! - */ - return 0; - - err = br_vlan_get_info(slave, pvid, &vinfo); - if (err < 0) { - dev_err(ds->dev, "Couldn't determine PVID attributes\n"); - return err; - } - - return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags); -} - /* If @enabled is true, installs @vid with @flags into the switch port's HW * filter. * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the @@ -178,39 +150,11 @@ static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, u16 flags, bool enabled) { struct dsa_port *dp = dsa_to_port(ds, port); - struct bridge_vlan_info vinfo; - int err; if (enabled) return dsa_port_vid_add(dp, vid, flags); - err = dsa_port_vid_del(dp, vid); - if (err < 0) - return err; - - /* Nothing to restore from the bridge for a non-user port. - * The CPU port VLANs are restored implicitly with the user ports, - * similar to how the bridge does in dsa_slave_vlan_add and - * dsa_slave_vlan_del. - */ - if (!dsa_is_user_port(ds, port)) - return 0; - - err = br_vlan_get_info(dp->slave, vid, &vinfo); - /* Couldn't determine bridge attributes for this vid, - * it means the bridge had not configured it. - */ - if (err < 0) - return 0; - - /* Restore the VID from the bridge */ - err = dsa_port_vid_add(dp, vid, vinfo.flags); - if (err < 0) - return err; - - vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID; - - return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags); + return dsa_port_vid_del(dp, vid); } /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single @@ -329,9 +273,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) return err; } - if (!enabled) - err = dsa_8021q_restore_pvid(ds, port); - return err; } EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 9991688d7d1d..7b7a0456c15c 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -227,9 +227,9 @@ cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = { }; /* CABLE_TEST_TDR_ACT */ -int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, - struct genl_info *info, - struct phy_tdr_config *cfg) +static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, + struct genl_info *info, + struct phy_tdr_config *cfg) { struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1]; int ret; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5da4733067fb..23ba5045e3d3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -384,6 +384,7 @@ config INET_ESPINTCP depends on XFRM && INET_ESP select STREAM_PARSER select NET_SOCK_MSG + select XFRM_ESPINTCP help Support for RFC 8229 encapsulation of ESP and IKE over TCP/IPv4 sockets. diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f43d5f12aa86..84ec3703c909 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -560,6 +560,61 @@ out: return err; } +static void __ip_sock_set_tos(struct sock *sk, int val) +{ + if (sk->sk_type == SOCK_STREAM) { + val &= ~INET_ECN_MASK; + val |= inet_sk(sk)->tos & INET_ECN_MASK; + } + if (inet_sk(sk)->tos != val) { + inet_sk(sk)->tos = val; + sk->sk_priority = rt_tos2priority(val); + sk_dst_reset(sk); + } +} + +void ip_sock_set_tos(struct sock *sk, int val) +{ + lock_sock(sk); + __ip_sock_set_tos(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_tos); + +void ip_sock_set_freebind(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->freebind = true; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_freebind); + +void ip_sock_set_recverr(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->recverr = true; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_recverr); + +int ip_sock_set_mtu_discover(struct sock *sk, int val) +{ + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) + return -EINVAL; + lock_sock(sk); + inet_sk(sk)->pmtudisc = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(ip_sock_set_mtu_discover); + +void ip_sock_set_pktinfo(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_pktinfo); /* * Socket option code for IP. This is the end of the line after any @@ -823,15 +878,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; break; case IP_TOS: /* This sets both TOS and Precedence */ - if (sk->sk_type == SOCK_STREAM) { - val &= ~INET_ECN_MASK; - val |= inet->tos & INET_ECN_MASK; - } - if (inet->tos != val) { - inet->tos = val; - sk->sk_priority = rt_tos2priority(val); - sk_dst_reset(sk); - } + __ip_sock_set_tos(sk, val); break; case IP_TTL: if (optlen < 1) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 970064996377..15d47d5e7951 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2801,6 +2801,163 @@ static void tcp_enable_tx_delay(void) } } +/* When set indicates to always queue non-full frames. Later the user clears + * this option and we transmit any pending partial frames in the queue. This is + * meant to be used alongside sendfile() to get properly filled frames when the + * user (for example) must write out headers with a write() call first and then + * use sendfile to send out the data parts. + * + * TCP_CORK can be set together with TCP_NODELAY and it is stronger than + * TCP_NODELAY. + */ +static void __tcp_sock_set_cork(struct sock *sk, bool on) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (on) { + tp->nonagle |= TCP_NAGLE_CORK; + } else { + tp->nonagle &= ~TCP_NAGLE_CORK; + if (tp->nonagle & TCP_NAGLE_OFF) + tp->nonagle |= TCP_NAGLE_PUSH; + tcp_push_pending_frames(sk); + } +} + +void tcp_sock_set_cork(struct sock *sk, bool on) +{ + lock_sock(sk); + __tcp_sock_set_cork(sk, on); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_cork); + +/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is + * remembered, but it is not activated until cork is cleared. + * + * However, when TCP_NODELAY is set we make an explicit push, which overrides + * even TCP_CORK for currently queued segments. + */ +static void __tcp_sock_set_nodelay(struct sock *sk, bool on) +{ + if (on) { + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + tcp_push_pending_frames(sk); + } else { + tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; + } +} + +void tcp_sock_set_nodelay(struct sock *sk) +{ + lock_sock(sk); + __tcp_sock_set_nodelay(sk, true); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_nodelay); + +static void __tcp_sock_set_quickack(struct sock *sk, int val) +{ + if (!val) { + inet_csk_enter_pingpong_mode(sk); + return; + } + + inet_csk_exit_pingpong_mode(sk); + if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + inet_csk_ack_scheduled(sk)) { + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED; + tcp_cleanup_rbuf(sk, 1); + if (!(val & 1)) + inet_csk_enter_pingpong_mode(sk); + } +} + +void tcp_sock_set_quickack(struct sock *sk, int val) +{ + lock_sock(sk); + __tcp_sock_set_quickack(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_quickack); + +int tcp_sock_set_syncnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_SYNCNT) + return -EINVAL; + + lock_sock(sk); + inet_csk(sk)->icsk_syn_retries = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_syncnt); + +void tcp_sock_set_user_timeout(struct sock *sk, u32 val) +{ + lock_sock(sk); + inet_csk(sk)->icsk_user_timeout = val; + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_user_timeout); + +static int __tcp_sock_set_keepidle(struct sock *sk, int val) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (val < 1 || val > MAX_TCP_KEEPIDLE) + return -EINVAL; + + tp->keepalive_time = val * HZ; + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + u32 elapsed = keepalive_time_elapsed(tp); + + if (tp->keepalive_time > elapsed) + elapsed = tp->keepalive_time - elapsed; + else + elapsed = 0; + inet_csk_reset_keepalive_timer(sk, elapsed); + } + + return 0; +} + +int tcp_sock_set_keepidle(struct sock *sk, int val) +{ + int err; + + lock_sock(sk); + err = __tcp_sock_set_keepidle(sk, val); + release_sock(sk); + return err; +} +EXPORT_SYMBOL(tcp_sock_set_keepidle); + +int tcp_sock_set_keepintvl(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPINTVL) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_intvl = val * HZ; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepintvl); + +int tcp_sock_set_keepcnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPCNT) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_probes = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepcnt); + /* * Socket option code for TCP. */ @@ -2898,20 +3055,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_NODELAY: - if (val) { - /* TCP_NODELAY is weaker than TCP_CORK, so that - * this option on corked socket is remembered, but - * it is not activated until cork is cleared. - * - * However, when TCP_NODELAY is set we make - * an explicit push, which overrides even TCP_CORK - * for currently queued segments. - */ - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk); - } else { - tp->nonagle &= ~TCP_NAGLE_OFF; - } + __tcp_sock_set_nodelay(sk, val); break; case TCP_THIN_LINEAR_TIMEOUTS: @@ -2979,43 +3123,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_CORK: - /* When set indicates to always queue non-full frames. - * Later the user clears this option and we transmit - * any pending partial frames in the queue. This is - * meant to be used alongside sendfile() to get properly - * filled frames when the user (for example) must write - * out headers with a write() call first and then use - * sendfile to send out the data parts. - * - * TCP_CORK can be set together with TCP_NODELAY and it is - * stronger than TCP_NODELAY. - */ - if (val) { - tp->nonagle |= TCP_NAGLE_CORK; - } else { - tp->nonagle &= ~TCP_NAGLE_CORK; - if (tp->nonagle&TCP_NAGLE_OFF) - tp->nonagle |= TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk); - } + __tcp_sock_set_cork(sk, val); break; case TCP_KEEPIDLE: - if (val < 1 || val > MAX_TCP_KEEPIDLE) - err = -EINVAL; - else { - tp->keepalive_time = val * HZ; - if (sock_flag(sk, SOCK_KEEPOPEN) && - !((1 << sk->sk_state) & - (TCPF_CLOSE | TCPF_LISTEN))) { - u32 elapsed = keepalive_time_elapsed(tp); - if (tp->keepalive_time > elapsed) - elapsed = tp->keepalive_time - elapsed; - else - elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); - } - } + err = __tcp_sock_set_keepidle(sk, val); break; case TCP_KEEPINTVL: if (val < 1 || val > MAX_TCP_KEEPINTVL) @@ -3072,19 +3184,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_QUICKACK: - if (!val) { - inet_csk_enter_pingpong_mode(sk); - } else { - inet_csk_exit_pingpong_mode(sk); - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - inet_csk_ack_scheduled(sk)) { - icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; - tcp_cleanup_rbuf(sk, 1); - if (!(val & 1)) - inet_csk_enter_pingpong_mode(sk); - } - } + __tcp_sock_set_quickack(sk, val); break; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ad90102f5dfb..83330a6cb242 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -437,7 +437,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* 3. Try to fixup all. It is made immediately after connection enters * established state. */ -void tcp_init_buffer_space(struct sock *sk) +static void tcp_init_buffer_space(struct sock *sk) { int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4eef5b84fff1..ad6435ba6d72 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -404,7 +404,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) EXPORT_SYMBOL(tcp_req_err); /* TCP-LD (RFC 6069) logic */ -static void tcp_ld_RTO_revert(struct sock *sk, u32 seq) +void tcp_ld_RTO_revert(struct sock *sk, u32 seq) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -441,6 +441,7 @@ static void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } +EXPORT_SYMBOL(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32564b350823..1b7ebbcae497 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -112,6 +112,9 @@ #include <net/sock_reuseport.h> #include <net/addrconf.h> #include <net/udp_tunnel.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: - up->encap_rcv = xfrm4_udp_encap_rcv; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + else +#endif + up->encap_rcv = xfrm4_udp_encap_rcv; #endif fallthrough; case UDP_ENCAP_L2TPINUDP: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 150e6f0fdbf5..2158e8bddf41 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->bind_ifindex) { - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, - (void *)&cfg->bind_ifindex, - sizeof(cfg->bind_ifindex)); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); if (err < 0) goto error; } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f8de2482a529..ad2afeef4f10 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -18,11 +18,6 @@ #include <net/ip.h> #include <net/xfrm.h> -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm4_extract_header(skb); -} - static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 30ddb9dc9398..3cff51ba72bb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -14,77 +14,18 @@ #include <net/xfrm.h> #include <net/icmp.h> -static int xfrm4_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - - if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) - goto out; - - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) - goto out; - - mtu = dst_mtu(skb_dst(skb)); - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { - skb->protocol = htons(ETH_P_IP); - - if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm4_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; - - return xfrm4_extract_header(skb); -} - -int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - - IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER struct xfrm_state *x = skb_dst(skb)->xfrm; - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; -#ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); } #endif - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; + return xfrm_output(sk, skb); } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index f8ed3c3bb928..87d4db591488 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -8,36 +8,12 @@ * */ -#include <net/ip.h> #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv4.h> -#include <linux/export.h> - -int xfrm4_extract_header(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = iph->id; - XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; - XFRM_MODE_SKB_CB(skb)->tos = iph->tos; - XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; - XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); - memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .proto = IPPROTO_IPIP, .output = xfrm4_output, - .output_finish = xfrm4_output_finish, - .extract_input = xfrm4_extract_input, - .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5a6111da26c4..4f03aece2980 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD If unsure, say N. +config INET6_ESPINTCP + bool "IPv6: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET6_ESP + select STREAM_PARSER + select NET_SOCK_MSG + select XFRM_ESPINTCP + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv6 sockets. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0625a97a8894..b304b882e031 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include <net/seg6.h> #include <net/rpl.h> #include <net/compat.h> +#include <net/xfrm.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -1023,6 +1024,11 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, +#if IS_ENABLED(CONFIG_XFRM) + .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, + .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_rcv_encap = xfrm6_rcv_encap, +#endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 45e2adc56610..d88d97617f7e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = { static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, .priority = 0, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 11143d039f16..c43592771126 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -26,11 +26,16 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> +#include <net/udp.h> #include <linux/icmpv6.h> +#include <net/tcp.h> +#include <net/espintcp.h> +#include <net/inet6_hashtables.h> #include <linux/highmem.h> @@ -39,6 +44,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) /* @@ -72,9 +82,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) @@ -104,16 +114,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { + struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; - int seqhilen = 0; + int extralen = 0; u8 *iv; struct aead_request *req; struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); - iv = esp_tmp_iv(aead, tmp, seqhilen); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); /* Unref skb_frag_pages in the src scatterlist if necessary. @@ -124,6 +135,149 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET6_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + +static void esp_output_encap_csum(struct sk_buff *skb) +{ + /* UDP encap with IPv6 requires a valid checksum */ + if (*skb_mac_header(skb) == IPPROTO_UDP) { + struct udphdr *uh = udp_hdr(skb); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int len = ntohs(uh->len); + unsigned int offset = skb_transport_offset(skb); + __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0); + + uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -143,6 +297,8 @@ static void esp_output_done(struct crypto_async_request *base, int err) esp_ssg_unref(x, tmp); kfree(tmp); + esp_output_encap_csum(skb); + if (xo && (xo->flags & XFRM_DEV_RESUME)) { if (err) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); @@ -154,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -163,7 +323,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -171,27 +331,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, struct xfrm_state *x, struct ip_esp_hdr *esph, - __be32 *seqhi) + struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; struct xfrm_offload *xo = xfrm_offload(skb); - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; if (xo) - esph->seq_no = htonl(xo->seq.hi); + seqhi = xo->seq.hi; else - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; @@ -207,15 +376,122 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, + int encap_type, + struct esp_info *esp, + __be16 sport, + __be16 dport) +{ + struct udphdr *uh; + __be32 *udpdata32; + unsigned int len; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > U16_MAX) + return ERR_PTR(-EMSGSIZE); + + uh = (struct udphdr *)esp->esph; + uh->source = sport; + uh->dest = dport; + uh->len = htons(len); + uh->check = 0; + + *skb_mac_header(skb) = IPPROTO_UDP; + + if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + return (struct ip_esp_hdr *)(udpdata32 + 2); + } + + return (struct ip_esp_hdr *)(uh + 1); +} + +#ifdef CONFIG_INET6_ESPINTCP +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp6_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + +static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, + struct esp_info *esp) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph; + __be16 sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); + break; + case TCP_ENCAP_ESPINTCP: + esph = esp6_output_tcp_encap(x, skb, esp); + break; + } + + if (IS_ERR(esph)) + return PTR_ERR(esph); + + esp->esph = esph; + + return 0; +} + int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; u8 *vaddr; int nfrags; + int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + if (x->encap) { + int err = esp6_output_encap(x, skb, esp); + + if (err < 0) + return err; + } + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -274,10 +550,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); + nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); @@ -295,20 +574,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info void *tmp; int ivlen; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; struct page *page; struct ip_esp_hdr *esph; struct aead_request *req; struct crypto_aead *aead; struct scatterlist *sg, *dsg; + struct esp_output_extra *extra; int err = -ENOMEM; assoclen = sizeof(struct ip_esp_hdr); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); assoclen += sizeof(__be32); } @@ -316,12 +595,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info alen = crypto_aead_authsize(aead); ivlen = crypto_aead_ivsize(aead); - tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen); + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); if (!tmp) goto error; - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -330,7 +609,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info else dsg = &sg[esp->nfrags]; - esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); + esph = esp_output_set_esn(skb, x, esp->esph, extra); + esp->esph = esph; sg_init_table(sg, esp->nfrags); err = skb_to_sgvec(skb, sg, @@ -394,11 +674,15 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info case 0: if ((x->props.flags & XFRM_STATE_ESN)) esp_output_restore_header(skb); + esp_output_encap_csum(skb); } if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -438,11 +722,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; + esp.esph = ip_esp_hdr(skb); + esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; - esph = ip_esp_hdr(skb); + esph = esp.esph; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); @@ -517,6 +803,60 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (unlikely(err < 0)) goto out; + if (x->encap) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct xfrm_encap_tmpl *encap = x->encap; + struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); + __be16 source; + + switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + source = uh->source; + break; + default: + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } + + /* + * 1) if the NAT-T peer's IP or port changed then + * advertize the change to the keying daemon. + * This is an inbound SA, so just compare + * SRC ports. + */ + if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) || + source != encap->encap_sport) { + xfrm_address_t ipaddr; + + memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6)); + km_new_mapping(x, &ipaddr, source); + + /* XXX: perhaps add an extra + * policy check here, to see + * if we should allow or + * reject a packet from a + * different source + * address/port. + */ + } + + /* + * 2) ignore UDP/TCP checksums in case + * of NAT-T in Transport Mode, or + * perform other post-processing fixes + * as per draft-ietf-ipsec-udp-encaps-06, + * section 3.1.2 + */ + if (x->props.mode == XFRM_MODE_TRANSPORT) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); @@ -632,7 +972,7 @@ skip_cow: goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -836,9 +1176,6 @@ static int esp6_init_state(struct xfrm_state *x) u32 align; int err; - if (x->encap) - return -EINVAL; - x->data = NULL; if (x->aead) @@ -867,6 +1204,30 @@ static int esp6_init_state(struct xfrm_state *x) break; } + if (x->encap) { + struct xfrm_encap_tmpl *encap = x->encap; + + switch (encap->encap_type) { + default: + err = -EINVAL; + goto error; + case UDP_ENCAP_ESPINUDP: + x->props.header_len += sizeof(struct udphdr); + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); + break; +#ifdef CONFIG_INET6_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif + } + } + align = ALIGN(crypto_aead_blocksize(aead), 4); x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); @@ -893,6 +1254,7 @@ static const struct xfrm_type esp6_type = { static struct xfrm6_protocol esp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, .priority = 0, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 8eab2c869d61..06163cc15844 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -271,7 +271,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features int alen; int blksize; struct xfrm_offload *xo; - struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; @@ -312,13 +311,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features seq = xo->seq.low; - esph = ip_esp_hdr(skb); - esph->spi = x->id.spi; + esp.esph = ip_esp_hdr(skb); + esp.esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(seq); + esp.esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 58956a6b66a2..2e0ad1bc84a8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -25,17 +25,12 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->ipv6_v6only) { - int val = 1; - - err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - (char *) &val, sizeof(val)); + err = ip6_sock_set_v6only(sock->sk); if (err < 0) goto error; } if (cfg->bind_ifindex) { - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, - (void *)&cfg->bind_ifindex, - sizeof(cfg->bind_ifindex)); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); if (err < 0) goto error; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index cc6180e08a4f..1147f647b9a0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev) dev_put(dev); } -static int vti6_rcv(struct sk_buff *skb) +static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_unlock(); - return xfrm6_rcv_tnl(skb, t); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, encap_type); } rcu_read_unlock(); return -EINVAL; @@ -332,6 +336,13 @@ discard: return 0; } +static int vti6_rcv(struct sk_buff *skb) +{ + int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; + + return vti6_input_proto(skb, nexthdr, 0, 0); +} + static int vti6_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = { static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3752bd3e92ce..99668bfebd85 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = { static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e10258c2210e..adbfed6adf11 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -845,67 +845,10 @@ done: break; case IPV6_ADDR_PREFERENCES: - { - unsigned int pref = 0; - unsigned int prefmask = ~0; - if (optlen < sizeof(int)) goto e_inval; - - retv = -EINVAL; - - /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ - switch (val & (IPV6_PREFER_SRC_PUBLIC| - IPV6_PREFER_SRC_TMP| - IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { - case IPV6_PREFER_SRC_PUBLIC: - pref |= IPV6_PREFER_SRC_PUBLIC; - break; - case IPV6_PREFER_SRC_TMP: - pref |= IPV6_PREFER_SRC_TMP; - break; - case IPV6_PREFER_SRC_PUBTMP_DEFAULT: - break; - case 0: - goto pref_skip_pubtmp; - default: - goto e_inval; - } - - prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| - IPV6_PREFER_SRC_TMP); -pref_skip_pubtmp: - - /* check HOME/COA conflicts */ - switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { - case IPV6_PREFER_SRC_HOME: - break; - case IPV6_PREFER_SRC_COA: - pref |= IPV6_PREFER_SRC_COA; - case 0: - goto pref_skip_coa; - default: - goto e_inval; - } - - prefmask &= ~IPV6_PREFER_SRC_COA; -pref_skip_coa: - - /* check CGA/NONCGA conflicts */ - switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { - case IPV6_PREFER_SRC_CGA: - case IPV6_PREFER_SRC_NONCGA: - case 0: - break; - default: - goto e_inval; - } - - np->srcprefs = (np->srcprefs & prefmask) | pref; - retv = 0; - + retv = __ip6_sock_set_addr_preferences(sk, val); break; - } case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) goto e_inval; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 01a6f5111a77..b7415ca75c2d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -473,6 +473,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else sk->sk_err_soft = err; goto out; + case TCP_LISTEN: + break; + default: + /* check if this ICMP message allows revert of backoff. + * (see RFC 6069) + */ + if (!fastopen && type == ICMPV6_DEST_UNREACH && + code == ICMPV6_NOROUTE) + tcp_ld_RTO_revert(sk, seq); } if (!sock_owned_by_user(sk) && np->recverr) { diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a52cb3fc6df5..04cbeefd8982 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -17,11 +17,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm6_extract_header(skb); -} - int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { @@ -35,9 +30,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (xfrm_trans_queue(skb, ip6_rcv_finish)) - __kfree_skb(skb); - return -1; + if (xfrm_trans_queue(skb, ip6_rcv_finish)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) @@ -60,13 +58,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); skb_reset_transport_header(skb); - return -1; + return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); - return -1; + return 0; +} + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct ipv6hdr *ip6h; + int len; + int ip6hlen = sizeof(struct ipv6hdr); + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_unclone(skb, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); + if (skb->len < ip6hlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* process ESP */ + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); + +drop: + kfree_skb(skb); + return 0; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e34167f790e6..8b84d534b19d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -23,24 +23,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, } EXPORT_SYMBOL(xfrm6_find_1stfragopt); -static int xfrm6_local_dontfrag(struct sk_buff *skb) -{ - int proto; - struct sock *sk = skb->sk; - - if (sk) { - if (sk->sk_family != AF_INET6) - return 0; - - proto = sk->sk_protocol; - if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; - } - - return 0; -} - -static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; @@ -64,80 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu) ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -static int xfrm6_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - struct dst_entry *dst = skb_dst(skb); - - if (skb->ignore_df) - goto out; - - mtu = dst_mtu(dst); - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - - if (xfrm6_local_dontfrag(skb)) - xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm6_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - - return xfrm6_extract_header(skb); -} - -int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - -static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, - struct sk_buff *skb) -{ - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; -} - static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -164,7 +76,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb)) { + if (toobig && xfrm6_local_dontfrag(skb->sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; @@ -179,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 34cb65c7d5a7..ea2f805d3b01 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> +#include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + int ret; + struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + + if (!head) + goto out; + + if (!skb_dst(skb)) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, + skb, flags); + if (dst->error) + goto drop; + skb_dst_set(skb, dst); + } + + for_each_protocol_rcu(*head, handler) + if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) + return ret; + +out: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_encap); + static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 78daadecbdef..6610b2198fa9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -13,37 +13,11 @@ */ #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv6.h> -#include <linux/export.h> -#include <net/dsfield.h> -#include <net/ipv6.h> -#include <net/addrconf.h> - -int xfrm6_extract_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = 0; - XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); - XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); - XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->optlen = 0; - memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .output = xfrm6_output, - .output_finish = xfrm6_output_finish, - .extract_input = xfrm6_extract_input, - .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, }; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 0e9aa94adc07..838cdfc10e47 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -172,17 +172,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) -{ - struct mpls_shim_hdr result; - result.label_stack_entry = - cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | - (tc << MPLS_LS_TC_SHIFT) | - (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | - (ttl << MPLS_LS_TTL_SHIFT)); - return result; -} - static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) { struct mpls_entry_decoded result; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 46782fac4c16..43db0eca911f 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { { } }; -/* doing it this way avoids calling tcp_sk() */ -void rds_tcp_nonagle(struct socket *sock) -{ - int val = 1; - - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, - sizeof(val)); -} - u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { /* seq# of the last byte of data in tcp send buffer */ @@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock) struct net *net = sock_net(sk); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); lock_sock(sk); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 3c69361d21c7..bad9cf49d565 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,7 +50,6 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); -void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, @@ -71,9 +70,8 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); -int rds_tcp_keepalive(struct socket *sock); +void rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); -void rds_tcp_set_linger(struct socket *sock); /* tcp_recv.c */ int rds_tcp_recv_init(void); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 008f50fb25dd..4e64598176b0 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) if (sock) { if (rds_destroy_pending(cp->cp_conn)) - rds_tcp_set_linger(sock); + sock_no_linger(sock->sk); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 810a3a49e947..101cf14215a0 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -38,36 +38,19 @@ #include "rds.h" #include "tcp.h" -int rds_tcp_keepalive(struct socket *sock) +void rds_tcp_keepalive(struct socket *sock) { /* values below based on xs_udp_default_timeout */ int keepidle = 5; /* send a probe 'keepidle' secs after last data */ int keepcnt = 5; /* number of unack'ed probes before declaring dead */ - int keepalive = 1; - int ret = 0; - - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&keepalive, sizeof(keepalive)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - if (ret < 0) - goto bail; + sock_set_keepalive(sock->sk); + tcp_sock_set_keepcnt(sock->sk, keepcnt); + tcp_sock_set_keepidle(sock->sk, keepidle); /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. */ - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); -bail: - return ret; + tcp_sock_set_keepintvl(sock->sk, keepidle); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the @@ -111,17 +94,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } -void rds_tcp_set_linger(struct socket *sock) -{ - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; - - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); -} - int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -160,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->ops = sock->ops; __module_get(new_sock->ops->owner); - ret = rds_tcp_keepalive(new_sock); - if (ret < 0) - goto out; - + rds_tcp_keepalive(new_sock); rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); @@ -241,7 +210,7 @@ rst_nsk: * be pending on it. By setting linger, we achieve the side-effect * of avoiding TIME_WAIT state on new_sock. */ - rds_tcp_set_linger(new_sock); + sock_no_linger(new_sock->sk); kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: @@ -303,7 +272,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) } sock->sk->sk_reuse = SK_CAN_REUSE; - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = sock->sk->sk_data_ready; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 78a2554a4497..8c4d1d6e9249 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -38,23 +38,18 @@ #include "rds.h" #include "tcp.h" -static void rds_tcp_cork(struct socket *sock, int val) -{ - kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val)); -} - void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 1); + tcp_sock_set_cork(tc->t_sock->sk, true); } void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 0); + tcp_sock_set_cork(tc->t_sock->sk, false); } /* the core send_sem serializes this with other xmit and shutdown */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 15ee92d79581..394189b81849 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -571,6 +571,19 @@ out: return ret; } +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val) +{ + if (sk->sk_state != RXRPC_UNBOUND) + return -EISCONN; + if (val > RXRPC_SECURITY_MAX) + return -EINVAL; + lock_sock(sk); + rxrpc_sk(sk)->min_sec_level = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(rxrpc_sock_set_min_security_level); + /* * set RxRPC socket options */ diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 01135e54d95d..c8b2097f499c 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { struct sock *usk; - int ret, opt; + int ret; _enter("%p{%d,%d}", local, local->srx.transport_type, local->srx.transport.family); @@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) switch (local->srx.transport.family) { case AF_INET6: /* we want to receive ICMPv6 errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip6_sock_set_recverr(local->socket->sk); /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. @@ -171,31 +165,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) /* Fall through */ case AF_INET: /* we want to receive ICMP errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_recverr(local->socket->sk); /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); /* We want receive timestamps. */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + sock_enable_timestamps(local->socket->sk); break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f8b632a5c619..1ba43c3df4ad 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - int ret, opt; + int ret; _enter(",{%d}", skb->len); @@ -473,18 +473,14 @@ send_fragmentable: switch (conn->params.local->srx.transport.family) { case AF_INET6: case AF_INET: - opt = IP_PMTUDISC_DONT; - kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DONT); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); conn->params.peer->last_tx_at = ktime_get_seconds(); - opt = IP_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DO); break; default: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 827a9903ee28..d57e1a002ffc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -972,23 +972,22 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * it. * * sk The sk of the socket - * addrs The pointer to the addresses in user land + * addrs The pointer to the addresses * addrssize Size of the addrs buffer * op Operation to perform (add or remove, see the flags of * sctp_bindx) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx(struct sock *sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx_kernel(struct sock *sk, + struct sockaddr *addrs, int addrs_size, + int op) { - struct sockaddr *kaddrs; int err; int addrcnt = 0; int walk_size = 0; struct sockaddr *sa_addr; - void *addr_buf; + void *addr_buf = addrs; struct sctp_af *af; pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", @@ -997,17 +996,10 @@ static int sctp_setsockopt_bindx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = memdup_user(addrs, addrs_size); - if (IS_ERR(kaddrs)) - return PTR_ERR(kaddrs); - /* Walk through the addrs buffer and count the number of addresses. */ - addr_buf = kaddrs; while (walk_size < addrs_size) { - if (walk_size + sizeof(sa_family_t) > addrs_size) { - kfree(kaddrs); + if (walk_size + sizeof(sa_family_t) > addrs_size) return -EINVAL; - } sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); @@ -1015,10 +1007,8 @@ static int sctp_setsockopt_bindx(struct sock *sk, /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. */ - if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kfree(kaddrs); + if (!af || (walk_size + af->sockaddr_len) > addrs_size) return -EINVAL; - } addrcnt++; addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; @@ -1029,31 +1019,48 @@ static int sctp_setsockopt_bindx(struct sock *sk, case SCTP_BINDX_ADD_ADDR: /* Allow security module to validate bindx addresses. */ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD, - (struct sockaddr *)kaddrs, - addrs_size); + addrs, addrs_size); if (err) - goto out; - err = sctp_bindx_add(sk, kaddrs, addrcnt); + return err; + err = sctp_bindx_add(sk, addrs, addrcnt); if (err) - goto out; - err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt); - break; - + return err; + return sctp_send_asconf_add_ip(sk, addrs, addrcnt); case SCTP_BINDX_REM_ADDR: - err = sctp_bindx_rem(sk, kaddrs, addrcnt); + err = sctp_bindx_rem(sk, addrs, addrcnt); if (err) - goto out; - err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt); - break; + return err; + return sctp_send_asconf_del_ip(sk, addrs, addrcnt); default: - err = -EINVAL; - break; + return -EINVAL; } +} -out: +static int sctp_setsockopt_bindx(struct sock *sk, + struct sockaddr __user *addrs, + int addrs_size, int op) +{ + struct sockaddr *kaddrs; + int err; + + kaddrs = memdup_user(addrs, addrs_size); + if (IS_ERR(kaddrs)) + return PTR_ERR(kaddrs); + err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op); kfree(kaddrs); + return err; +} +static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs, + int addrlen) +{ + int err; + + lock_sock(sk); + err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen, + SCTP_BINDX_ADD_ADDR); + release_sock(sk); return err; } @@ -9625,6 +9632,7 @@ struct proto sctp_prot = { .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, + .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, @@ -9667,6 +9675,7 @@ struct proto sctpv6_prot = { .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, + .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b2b85e1be72c..a47e8855e045 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -116,19 +116,15 @@ int smc_cdc_msg_send(struct smc_connection *conn, } /* send a validation msg indicating the move of a conn to an other QP link */ -int smcr_cdc_msg_send_validation(struct smc_connection *conn) +int smcr_cdc_msg_send_validation(struct smc_connection *conn, + struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf) { struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; struct smc_link *link = conn->lnk; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; struct smc_cdc_msg *peer; int rc; - rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); - if (rc) - return rc; - peer = (struct smc_cdc_msg *)wr_buf; peer->common.type = local->common.type; peer->len = local->len; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 2ddcc5fb5ceb..0a0a89abd38b 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -296,7 +296,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); int smcd_cdc_msg_send(struct smc_connection *conn); -int smcr_cdc_msg_send_validation(struct smc_connection *conn); +int smcr_cdc_msg_send_validation(struct smc_connection *conn, + struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf); int smc_cdc_init(void) __init; void smcd_cdc_rx_init(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 65de700e1f17..7964a21e5e6f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -483,7 +483,8 @@ static int smc_write_space(struct smc_connection *conn) return space; } -static int smc_switch_cursor(struct smc_sock *smc) +static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf) { struct smc_connection *conn = &smc->conn; union smc_host_cursor cons, fin; @@ -520,11 +521,14 @@ static int smc_switch_cursor(struct smc_sock *smc) if (smc->sk.sk_state != SMC_INIT && smc->sk.sk_state != SMC_CLOSED) { - rc = smcr_cdc_msg_send_validation(conn); + rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); if (!rc) { schedule_delayed_work(&conn->tx_work, 0); smc->sk.sk_data_ready(&smc->sk); } + } else { + smc_wr_tx_put_slot(conn->lnk, + (struct smc_wr_tx_pend_priv *)pend); } return rc; } @@ -533,7 +537,9 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err) { struct smc_link *to_lnk = NULL; + struct smc_cdc_tx_pend *pend; struct smc_connection *conn; + struct smc_wr_buf *wr_buf; struct smc_sock *smc; struct rb_node *node; int i, rc = 0; @@ -582,10 +588,16 @@ again: } sock_hold(&smc->sk); read_unlock_bh(&lgr->conns_lock); + /* pre-fetch buffer outside of send_lock, might sleep */ + rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); + if (rc) { + smcr_link_down_cond_sched(to_lnk); + return NULL; + } /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); conn->lnk = to_lnk; - rc = smc_switch_cursor(smc); + rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); if (rc) { diff --git a/net/socket.c b/net/socket.c index 80422fc3c836..976426d03f09 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3625,71 +3625,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) EXPORT_SYMBOL(kernel_getpeername); /** - * kernel_getsockopt - get a socket option (kernel space) - * @sock: socket - * @level: API level (SOL_SOCKET, ...) - * @optname: option tag - * @optval: option value - * @optlen: option length - * - * Assigns the option length to @optlen. - * Returns 0 or an error. - */ - -int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) -{ - mm_segment_t oldfs = get_fs(); - char __user *uoptval; - int __user *uoptlen; - int err; - - uoptval = (char __user __force *) optval; - uoptlen = (int __user __force *) optlen; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); - else - err = sock->ops->getsockopt(sock, level, optname, uoptval, - uoptlen); - set_fs(oldfs); - return err; -} -EXPORT_SYMBOL(kernel_getsockopt); - -/** - * kernel_setsockopt - set a socket option (kernel space) - * @sock: socket - * @level: API level (SOL_SOCKET, ...) - * @optname: option tag - * @optval: option value - * @optlen: option length - * - * Returns 0 or an error. - */ - -int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, unsigned int optlen) -{ - mm_segment_t oldfs = get_fs(); - char __user *uoptval; - int err; - - uoptval = (char __user __force *) optval; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, uoptval, optlen); - else - err = sock->ops->setsockopt(sock, level, optname, uoptval, - optlen); - set_fs(oldfs); - return err; -} -EXPORT_SYMBOL(kernel_setsockopt); - -/** * kernel_sendpage - send a &page through a socket (kernel space) * @sock: socket * @page: page diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 023514e392b3..e7a0037d9b56 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) { - struct svc_sock *svsk; - struct socket *sock; - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + sock_no_linger(svsk->sk_sock->sk); } /* @@ -603,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = { static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { - int err, level, optname, one = 1; - svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -624,19 +614,14 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) /* make sure we get destination address info */ switch (svsk->sk_sk->sk_family) { case AF_INET: - level = SOL_IP; - optname = IP_PKTINFO; + ip_sock_set_pktinfo(svsk->sk_sock->sk); break; case AF_INET6: - level = SOL_IPV6; - optname = IPV6_RECVPKTINFO; + ip6_sock_set_recvpktinfo(svsk->sk_sock->sk); break; default: BUG(); } - err = kernel_setsockopt(svsk->sk_sock, level, optname, - (char *)&one, sizeof(one)); - dprintk("svc: kernel_setsockopt returned %d\n", err); } /* @@ -1337,7 +1322,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; int family; - int val; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("svc: svc_create_socket(%s, %d, %s)\n", @@ -1373,11 +1357,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, * getting requests from IPv4 remotes. Those should * be shunted to a PF_INET listener via rpcbind. */ - val = 1; if (family == PF_INET6) - kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, - (char *)&val, sizeof(val)); - + ip6_sock_set_v6only(sock->sk); if (type == SOCK_STREAM) sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */ error = kernel_bind(sock, sin, len); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 845d0be805ec..3a143e250b9a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1594,21 +1594,6 @@ static int xs_get_random_port(void) return rand + min; } -/** - * xs_set_reuseaddr_port - set the socket's port and address reuse options - * @sock: socket - * - * Note that this function has to be called on all sockets that share the - * same port, and it must be called before binding. - */ -static void xs_sock_set_reuseport(struct socket *sock) -{ - int opt = 1; - - kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, - (char *)&opt, sizeof(opt)); -} - static unsigned short xs_sock_getport(struct socket *sock) { struct sockaddr_storage buf; @@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, xs_reclassify_socket(family, sock); if (reuseport) - xs_sock_set_reuseport(sock); + sock_set_reuseport(sock->sk); err = xs_bind(transport, sock); if (err) { @@ -2110,7 +2095,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); unsigned int keepidle; unsigned int keepcnt; - unsigned int opt_on = 1; unsigned int timeo; spin_lock(&xprt->transport_lock); @@ -2122,18 +2106,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, spin_unlock(&xprt->transport_lock); /* TCP Keepalive options */ - kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&opt_on, sizeof(opt_on)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); + sock_set_keepalive(sock->sk); + tcp_sock_set_keepidle(sock->sk, keepidle); + tcp_sock_set_keepintvl(sock->sk, keepidle); + tcp_sock_set_keepcnt(sock->sk, keepcnt); /* TCP user timeout (see RFC5482) */ - kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&timeo, sizeof(timeo)); + tcp_sock_set_user_timeout(sock->sk, timeo); } static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, @@ -2171,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; - unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; /* Avoid temporary address, they are bad for long-lived * connections such as NFS mounts. @@ -2180,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * knowledge about the normal duration of connections, * MAY override this as appropriate. */ - kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, - (char *)&addr_pref, sizeof(addr_pref)); + if (xs_addr(xprt)->sa_family == PF_INET6) { + ip6_sock_set_addr_preferences(sk, + IPV6_PREFER_SRC_PUBLIC); + } xs_tcp_set_socket_timeouts(xprt, sock); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 23809039dda1..c0afcd627c5e 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -212,7 +212,7 @@ err: int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, int mss, struct sk_buff_head *txq) { - struct sk_buff *skb, *prev; + struct sk_buff *skb; int accounted, total, curr; int mlen, cpy, rem = dlen; struct tipc_msg *hdr; @@ -223,7 +223,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, while (rem) { if (!skb || skb->len >= mss) { - prev = skb; skb = tipc_buf_acquire(mss, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d6b67d07d22e..3734cdbedc9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -196,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk) return msg_importance(&tsk->phdr); } -static int tsk_set_importance(struct tipc_sock *tsk, int imp) +static struct tipc_sock *tipc_sk(const struct sock *sk) { - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&tsk->phdr, (u32)imp); - return 0; + return container_of(sk, struct tipc_sock, sk); } -static struct tipc_sock *tipc_sk(const struct sock *sk) +int tsk_set_importance(struct sock *sk, int imp) { - return container_of(sk, struct tipc_sock, sk); + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); + return 0; } static bool tsk_conn_cong(struct tipc_sock *tsk) @@ -2721,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - tsk_set_importance(new_tsock, msg_importance(msg)); + tsk_set_importance(new_sk, msg_importance(msg)); if (msg_named(msg)) { new_tsock->conn_type = msg_nametype(msg); new_tsock->conn_instance = msg_nameinst(msg); @@ -3139,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - res = tsk_set_importance(tsk, value); + res = tsk_set_importance(sk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 235b9679acee..b11575afc66f 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk); bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb); bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb); +int tsk_set_importance(struct sock *sk, int imp); + #endif diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 446af7bbd13e..1489cfb941d8 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk) static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) { - int imp = TIPC_CRITICAL_IMPORTANCE; struct socket *lsock = NULL; struct sockaddr_tipc saddr; struct sock *sk; @@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) sk->sk_user_data = srv; write_unlock_bh(&sk->sk_callback_lock); - rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&imp, sizeof(imp)); + lock_sock(sk); + rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE); + release_sock(sk); if (rc < 0) goto err; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6921a18201a0..b7fd9c838416 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -99,4 +99,7 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_ESPINTCP + bool + endif # INET diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 212a4fcb4a88..2d4bb4b9f75e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o -obj-$(CONFIG_INET_ESPINTCP) += espintcp.o +obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..2132a3b6df0f 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -6,6 +6,9 @@ #include <net/espintcp.h> #include <linux/skmsg.h> #include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); local_bh_disable(); - xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + else +#endif + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); local_bh_enable(); rcu_read_unlock(); } @@ -347,6 +355,9 @@ unlock: static struct proto espintcp_prot __ro_after_init; static struct proto_ops espintcp_ops __ro_after_init; +static struct proto espintcp6_prot; +static struct proto_ops espintcp6_ops; +static DEFINE_MUTEX(tcpv6_prot_mutex); static void espintcp_data_ready(struct sock *sk) { @@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk) bool tcp_is_ulp_esp(struct sock *sk) { - return sk->sk_prot == &espintcp_prot; + return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot; } EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); static int espintcp_init_sk(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk) strp_check_rcv(&ctx->strp); skb_queue_head_init(&ctx->ike_queue); skb_queue_head_init(&ctx->out_queue); - sk->sk_prot = &espintcp_prot; - sk->sk_socket->ops = &espintcp_ops; + + if (sk->sk_family == AF_INET) { + sk->sk_prot = &espintcp_prot; + sk->sk_socket->ops = &espintcp_ops; + } else { + mutex_lock(&tcpv6_prot_mutex); + if (!espintcp6_prot.recvmsg) + build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops); + mutex_unlock(&tcpv6_prot_mutex); + + sk->sk_prot = &espintcp6_prot; + sk->sk_socket->ops = &espintcp6_ops; + } ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; sk->sk_data_ready = espintcp_data_ready; @@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock, return mask; } +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(espintcp_prot, orig_prot, sizeof(struct proto)); + memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops)); + espintcp_prot->sendmsg = espintcp_sendmsg; + espintcp_prot->recvmsg = espintcp_recvmsg; + espintcp_prot->close = espintcp_close; + espintcp_prot->release_cb = espintcp_release; + espintcp_ops->poll = espintcp_poll; +} + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { .name = "espintcp", .owner = THIS_MODULE, @@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = { void __init espintcp_init(void) { - memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); - memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); - espintcp_prot.sendmsg = espintcp_sendmsg; - espintcp_prot.recvmsg = espintcp_recvmsg; - espintcp_prot.close = espintcp_close; - espintcp_prot.release_cb = espintcp_release; - espintcp_ops.poll = espintcp_poll; + build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops); tcp_register_ulp(&espintcp_ulp); } diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h index c7b0318938e2..efc5e6b2e87b 100644 --- a/net/xfrm/xfrm_inout.h +++ b/net/xfrm/xfrm_inout.h @@ -6,6 +6,38 @@ #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 +static inline void xfrm4_extract_header(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = iph->id; + XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; + XFRM_MODE_SKB_CB(skb)->tos = iph->tos; + XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); + memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +} + +static inline void xfrm6_extract_header(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +#else + WARN_ON_ONCE(1); +#endif +} + static inline void xfrm6_beet_make_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index aa35f23c4912..6db266a0cb2d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { const struct xfrm_mode *inner_mode = &x->inner_mode; - const struct xfrm_state_afinfo *afinfo; - int err = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - err = afinfo->extract_input(x, skb); - rcu_read_unlock(); - if (err) - return err; + switch (x->outer_mode.family) { + case AF_INET: + xfrm4_extract_header(skb); + break; + case AF_INET6: + xfrm6_extract_header(skb); + break; + default: + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 3361e3ac5714..02f8f46d0cc5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - dev_hold(dev); xfrmi_link(xfrmn, xi); return 0; @@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(dev); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) @@ -757,6 +755,7 @@ static struct pernet_operations xfrmi_net_ops = { static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -764,6 +763,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -771,6 +771,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2fd3d990d992..9c43b8dd80fb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,9 +13,15 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/icmp.h> #include <net/inet_ecn.h> #include <net/xfrm.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_route.h> +#include <net/ipv6_stubs.h> +#endif + #include "xfrm_inout.h" static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -565,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + switch (x->outer_mode.family) { + case AF_INET: + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +#ifdef CONFIG_NETFILTER + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; +#endif + break; + case AF_INET6: + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif + break; + } + secpath_reset(skb); if (xfrm_dev_offload_ok(skb, x)) { @@ -609,11 +631,101 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); +static int xfrm4_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + + if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) + goto out; + + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) + goto out; + + mtu = dst_mtu(skb_dst(skb)); + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { + skb->protocol = htons(ETH_P_IP); + + if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + ret = -EMSGSIZE; + } +out: + return ret; +} + +static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm4_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; + + xfrm4_extract_header(skb); + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + struct dst_entry *dst = skb_dst(skb); + + if (skb->ignore_df) + goto out; + + mtu = dst_mtu(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + if (xfrm6_local_dontfrag(skb->sk)) + ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ret = -EMSGSIZE; + } +out: + return ret; +} +#endif + +static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + + xfrm6_extract_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; - int err = -EAFNOSUPPORT; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, @@ -624,13 +736,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (inner_mode == NULL) return -EAFNOSUPPORT; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - err = afinfo->extract_output(x, skb); - rcu_read_unlock(); + switch (inner_mode->family) { + case AF_INET: + return xfrm4_extract_output(x, skb); + case AF_INET6: + return xfrm6_extract_output(x, skb); + } - return err; + return -EAFNOSUPPORT; } void xfrm_local_error(struct sk_buff *skb, int mtu) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 1e2f61262e4e..dee567f7576a 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -19,8 +19,8 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal" +IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture" +IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture" ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" TESTS="${ALL_TESTS}" @@ -767,6 +767,62 @@ ipv6_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv6_del_add_loop1() +{ + while :; do + $IP nexthop del id 100 + $IP nexthop add id 100 via 2001:db8:91::2 dev veth1 + done >/dev/null 2>&1 +} + +ipv6_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 + done >/dev/null 2>&1 +} + +ipv6_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv6 runtime torture" + echo "--------------------" + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101" + run_cmd "$IP route add 2001:db8:101::1 nhid 102" + run_cmd "$IP route add 2001:db8:101::2 nhid 102" + + ipv6_del_add_loop1 & + pid1=$! + ipv6_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + + # if we did not crash, success + log_test 0 0 "IPv6 torture test" +} + + ipv4_fcnal() { local rc @@ -1313,6 +1369,61 @@ ipv4_compat_mode() sysctl_nexthop_compat_mode_set 1 "IPv4" } +ipv4_del_add_loop1() +{ + while :; do + $IP nexthop del id 100 + $IP nexthop add id 100 via 172.16.1.2 dev veth1 + done >/dev/null 2>&1 +} + +ipv4_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 + done >/dev/null 2>&1 +} + +ipv4_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv4 runtime torture" + echo "--------------------" + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101" + run_cmd "$IP route add 172.16.101.1 nhid 102" + run_cmd "$IP route add 172.16.101.2 nhid 102" + + ipv4_del_add_loop1 & + pid1=$! + ipv4_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + + # if we did not crash, success + log_test 0 0 "IPv4 torture test" +} + basic() { echo |