diff options
Diffstat (limited to 'net/dsa')
-rw-r--r-- | net/dsa/Kconfig | 92 | ||||
-rw-r--r-- | net/dsa/Makefile | 20 | ||||
-rw-r--r-- | net/dsa/dsa.c | 239 | ||||
-rw-r--r-- | net/dsa/dsa2.c | 1178 | ||||
-rw-r--r-- | net/dsa/dsa_priv.h | 487 | ||||
-rw-r--r-- | net/dsa/master.c | 220 | ||||
-rw-r--r-- | net/dsa/netlink.c | 63 | ||||
-rw-r--r-- | net/dsa/port.c | 1732 | ||||
-rw-r--r-- | net/dsa/slave.c | 2641 | ||||
-rw-r--r-- | net/dsa/switch.c | 949 | ||||
-rw-r--r-- | net/dsa/tag_8021q.c | 598 | ||||
-rw-r--r-- | net/dsa/tag_ar9331.c | 8 | ||||
-rw-r--r-- | net/dsa/tag_brcm.c | 147 | ||||
-rw-r--r-- | net/dsa/tag_dsa.c | 403 | ||||
-rw-r--r-- | net/dsa/tag_edsa.c | 184 | ||||
-rw-r--r-- | net/dsa/tag_gswip.c | 10 | ||||
-rw-r--r-- | net/dsa/tag_hellcreek.c | 70 | ||||
-rw-r--r-- | net/dsa/tag_ksz.c | 153 | ||||
-rw-r--r-- | net/dsa/tag_lan9303.c | 59 | ||||
-rw-r--r-- | net/dsa/tag_mtk.c | 57 | ||||
-rw-r--r-- | net/dsa/tag_ocelot.c | 311 | ||||
-rw-r--r-- | net/dsa/tag_ocelot_8021q.c | 135 | ||||
-rw-r--r-- | net/dsa/tag_qca.c | 108 | ||||
-rw-r--r-- | net/dsa/tag_rtl4_a.c | 124 | ||||
-rw-r--r-- | net/dsa/tag_rtl8_4.c | 258 | ||||
-rw-r--r-- | net/dsa/tag_rzn1_a5psw.c | 113 | ||||
-rw-r--r-- | net/dsa/tag_sja1105.c | 663 | ||||
-rw-r--r-- | net/dsa/tag_trailer.c | 36 | ||||
-rw-r--r-- | net/dsa/tag_xrs700x.c | 64 |
29 files changed, 8993 insertions, 2129 deletions
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 92663dcb3aa2..3eef72ce99a4 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,33 +1,22 @@ # SPDX-License-Identifier: GPL-2.0-only -config HAVE_NET_DSA - def_bool y - depends on INET && NETDEVICES && !S390 - -# Drivers must select NET_DSA and the appropriate tagging format menuconfig NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n + depends on HSR || HSR=n + depends on INET && NETDEVICES + select GRO_CELLS select NET_SWITCHDEV select PHYLINK select NET_DEVLINK - ---help--- + imply NET_SELFTESTS + help Say Y if you want to enable support for the hardware switches supported by the Distributed Switch Architecture. if NET_DSA -# tagging formats -config NET_DSA_TAG_8021Q - tristate - select VLAN_8021Q - help - Unlike the other tagging protocols, the 802.1Q config option simply - provides helpers for other tagging implementations that might rely on - VLAN in one way or another. It is not a complete solution. - - Drivers which use these helpers should select this as dependency. +# Drivers must select the appropriate tagging format(s) config NET_DSA_TAG_AR9331 tristate "Tag driver for Atheros AR9331 SoC with built-in switch" @@ -46,6 +35,13 @@ config NET_DSA_TAG_BRCM Say Y if you want to enable support for tagging frames for the Broadcom switches which place the tag after the MAC source address. +config NET_DSA_TAG_BRCM_LEGACY + tristate "Tag driver for Broadcom legacy switches using in-frame headers" + select NET_DSA_TAG_BRCM_COMMON + help + Say Y if you want to enable support for tagging frames for the + Broadcom legacy switches which place the tag after the MAC source + address. config NET_DSA_TAG_BRCM_PREPEND tristate "Tag driver for Broadcom switches using prepended headers" @@ -55,20 +51,31 @@ config NET_DSA_TAG_BRCM_PREPEND Broadcom switches which places the tag before the Ethernet header (prepended). +config NET_DSA_TAG_HELLCREEK + tristate "Tag driver for Hirschmann Hellcreek TSN switches" + help + Say Y or M if you want to enable support for tagging frames + for the Hirschmann Hellcreek TSN switches. + config NET_DSA_TAG_GSWIP tristate "Tag driver for Lantiq / Intel GSWIP switches" help Say Y or M if you want to enable support for tagging frames for the Lantiq / Intel GSWIP switches. +config NET_DSA_TAG_DSA_COMMON + tristate + config NET_DSA_TAG_DSA tristate "Tag driver for Marvell switches using DSA headers" + select NET_DSA_TAG_DSA_COMMON help Say Y or M if you want to enable support for tagging frames for the Marvell switches which use DSA headers. config NET_DSA_TAG_EDSA tristate "Tag driver for Marvell switches using EtherType DSA headers" + select NET_DSA_TAG_DSA_COMMON help Say Y or M if you want to enable support for tagging frames for the Marvell switches which use EtherType DSA headers. @@ -80,17 +87,31 @@ config NET_DSA_TAG_MTK Mediatek switches. config NET_DSA_TAG_KSZ - tristate "Tag driver for Microchip 8795/9477/9893 families of switches" + tristate "Tag driver for Microchip 8795/937x/9477/9893 families of switches" help Say Y if you want to enable support for tagging frames for the - Microchip 8795/9477/9893 families of switches. + Microchip 8795/937x/9477/9893 families of switches. config NET_DSA_TAG_OCELOT - tristate "Tag driver for Ocelot family of switches" + tristate "Tag driver for Ocelot family of switches, using NPI port" select PACKING help - Say Y or M if you want to enable support for tagging frames for the - Ocelot switches (VSC7511, VSC7512, VSC7513, VSC7514, VSC9959). + Say Y or M if you want to enable NPI tagging for the Ocelot switches + (VSC7511, VSC7512, VSC7513, VSC7514, VSC9953, VSC9959). In this mode, + the frames over the Ethernet CPU port are prepended with a + hardware-defined injection/extraction frame header. Flow control + (PAUSE frames) over the CPU port is not supported when operating in + this mode. + +config NET_DSA_TAG_OCELOT_8021Q + tristate "Tag driver for Ocelot family of switches, using VLAN" + help + Say Y or M if you want to enable support for tagging frames with a + custom VLAN-based header. Frames that require timestamping, such as + PTP, are not delivered over Ethernet but over register-based MMIO. + Flow control over the CPU port is functional in this mode. When using + this mode, less TCAM resources (VCAP IS1, IS2, ES0) are available for + use with tc-flower. config NET_DSA_TAG_QCA tristate "Tag driver for Qualcomm Atheros QCA8K switches" @@ -98,6 +119,26 @@ config NET_DSA_TAG_QCA Say Y or M if you want to enable support for tagging frames for the Qualcomm Atheros QCA8K switches. +config NET_DSA_TAG_RTL4_A + tristate "Tag driver for Realtek 4 byte protocol A tags" + help + Say Y or M if you want to enable support for tagging frames for the + Realtek switches with 4 byte protocol A tags, sich as found in + the Realtek RTL8366RB. + +config NET_DSA_TAG_RTL8_4 + tristate "Tag driver for Realtek 8 byte protocol 4 tags" + help + Say Y or M if you want to enable support for tagging frames for Realtek + switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC. + +config NET_DSA_TAG_RZN1_A5PSW + tristate "Tag driver for Renesas RZ/N1 A5PSW switch" + help + Say Y or M if you want to enable support for tagging frames for + Renesas RZ/N1 embedded switch that uses an 8 byte tag located after + destination MAC address. + config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help @@ -106,7 +147,6 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - select NET_DSA_TAG_8021Q select PACKING help Say Y or M if you want to enable support for tagging frames with the @@ -120,4 +160,10 @@ config NET_DSA_TAG_TRAILER Say Y or M if you want to enable support for tagging frames at with a trailed. e.g. Marvell 88E6060. +config NET_DSA_TAG_XRS700X + tristate "Tag driver for XRS700x switches" + help + Say Y or M if you want to enable support for tagging frames for + Arrow SpeedChips XRS700x switches that use a single byte tag trailer. + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 108486cfdeef..bf57ef3bce2a 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,19 +1,31 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o +dsa_core-y += \ + dsa.o \ + dsa2.o \ + master.o \ + netlink.o \ + port.o \ + slave.o \ + switch.o \ + tag_8021q.o # tagging formats -obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o -obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o -obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o +obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o +obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o +obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 17281fec710c..64b14f655b23 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -7,19 +7,10 @@ #include <linux/device.h> #include <linux/list.h> -#include <linux/platform_device.h> -#include <linux/slab.h> #include <linux/module.h> -#include <linux/notifier.h> -#include <linux/of.h> -#include <linux/of_mdio.h> -#include <linux/of_platform.h> -#include <linux/of_net.h> #include <linux/netdevice.h> #include <linux/sysfs.h> -#include <linux/phy_fixed.h> #include <linux/ptp_classify.h> -#include <linux/etherdevice.h> #include "dsa_priv.h" @@ -84,17 +75,39 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) return ops->name; }; +/* Function takes a reference on the module owning the tagger, + * so dsa_tag_driver_put must be called afterwards. + */ +const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf) +{ + const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT); + struct dsa_tag_driver *dsa_tag_driver; + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + const struct dsa_device_ops *tmp = dsa_tag_driver->ops; + + if (!sysfs_streq(buf, tmp->name)) + continue; + + if (!try_module_get(dsa_tag_driver->owner)) + break; + + ops = tmp; + break; + } + mutex_unlock(&dsa_tag_drivers_lock); + + return ops; +} + const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol) { struct dsa_tag_driver *dsa_tag_driver; const struct dsa_device_ops *ops; - char module_name[128]; bool found = false; - snprintf(module_name, 127, "%s%d", DSA_TAG_DRIVER_ALIAS, - tag_protocol); - - request_module(module_name); + request_module("%s%d", DSA_TAG_DRIVER_ALIAS, tag_protocol); mutex_lock(&dsa_tag_drivers_lock); list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { @@ -205,7 +218,6 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, { struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; - struct pcpu_sw_netstats *s; struct dsa_slave_priv *p; if (unlikely(!cpu_dp)) { @@ -217,50 +229,64 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = cpu_dp->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev); if (!nskb) { kfree_skb(skb); return 0; } skb = nskb; - p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - s = this_cpu_ptr(p->stats64); - u64_stats_update_begin(&s->syncp); - s->rx_packets++; - s->rx_bytes += skb->len; - u64_stats_update_end(&s->syncp); + if (unlikely(!dsa_slave_dev_check(skb->dev))) { + /* Packet is to be injected directly on an upper + * device, e.g. a team/bond, so skip all DSA-port + * specific actions. + */ + netif_rx(skb); + return 0; + } + + p = netdev_priv(skb->dev); + + if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { + nskb = dsa_untag_bridge_pvid(skb); + if (!nskb) { + kfree_skb(skb); + return 0; + } + skb = nskb; + } + + dev_sw_netstats_rx_add(skb->dev, skb->len); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; - netif_receive_skb(skb); + gro_cells_receive(&p->gcells, skb); return 0; } #ifdef CONFIG_PM_SLEEP -static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +static bool dsa_port_is_initialized(const struct dsa_port *dp) { - const struct dsa_port *dp = dsa_to_port(ds, p); - return dp->type == DSA_PORT_TYPE_USER && dp->slave; } int dsa_switch_suspend(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; /* Suspend slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave); + ret = dsa_slave_suspend(dp->slave); if (ret) return ret; } @@ -274,7 +300,8 @@ EXPORT_SYMBOL_GPL(dsa_switch_suspend); int dsa_switch_resume(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; if (ds->ops->resume) ret = ds->ops->resume(ds); @@ -283,11 +310,11 @@ int dsa_switch_resume(struct dsa_switch *ds) return ret; /* Resume slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_resume(dsa_to_port(ds, i)->slave); + ret = dsa_slave_resume(dp->slave); if (ret) return ret; } @@ -309,36 +336,16 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } -static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain); - -int register_dsa_notifier(struct notifier_block *nb) +void dsa_flush_workqueue(void) { - return atomic_notifier_chain_register(&dsa_notif_chain, nb); + flush_workqueue(dsa_owq); } -EXPORT_SYMBOL_GPL(register_dsa_notifier); - -int unregister_dsa_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&dsa_notif_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_dsa_notifier); - -int call_dsa_notifiers(unsigned long val, struct net_device *dev, - struct dsa_notifier_info *info) -{ - info->dev = dev; - return atomic_notifier_call_chain(&dsa_notif_chain, val, info); -} -EXPORT_SYMBOL_GPL(call_dsa_notifiers); +EXPORT_SYMBOL_GPL(dsa_flush_workqueue); int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { - struct dsa_devlink_priv *dl_priv; - struct dsa_switch *ds; - - dl_priv = devlink_priv(dl); - ds = dl_priv->ds; + struct dsa_switch *ds = dsa_devlink_to_ds(dl); if (!ds->ops->devlink_param_get) return -EOPNOTSUPP; @@ -350,11 +357,7 @@ EXPORT_SYMBOL_GPL(dsa_devlink_param_get); int dsa_devlink_param_set(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { - struct dsa_devlink_priv *dl_priv; - struct dsa_switch *ds; - - dl_priv = devlink_priv(dl); - ds = dl_priv->ds; + struct dsa_switch *ds = dsa_devlink_to_ds(dl); if (!ds->ops->devlink_param_set) return -EOPNOTSUPP; @@ -395,7 +398,7 @@ EXPORT_SYMBOL_GPL(dsa_devlink_resource_register); void dsa_devlink_resources_unregister(struct dsa_switch *ds) { - devlink_resources_unregister(ds->devlink, NULL); + devlink_resources_unregister(ds->devlink); } EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister); @@ -416,6 +419,105 @@ void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, } EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister); +struct devlink_region * +dsa_devlink_region_create(struct dsa_switch *ds, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + return devlink_region_create(ds->devlink, ops, region_max_snapshots, + region_size); +} +EXPORT_SYMBOL_GPL(dsa_devlink_region_create); + +struct devlink_region * +dsa_devlink_port_region_create(struct dsa_switch *ds, + int port, + const struct devlink_port_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + + return devlink_port_region_create(&dp->devlink_port, ops, + region_max_snapshots, + region_size); +} +EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create); + +void dsa_devlink_region_destroy(struct devlink_region *region) +{ + devlink_region_destroy(region); +} +EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy); + +struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) +{ + if (!netdev || !dsa_slave_dev_check(netdev)) + return ERR_PTR(-ENODEV); + + return dsa_slave_to_port(netdev); +} +EXPORT_SYMBOL_GPL(dsa_port_from_netdev); + +bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b) +{ + if (a->type != b->type) + return false; + + switch (a->type) { + case DSA_DB_PORT: + return a->dp == b->dp; + case DSA_DB_LAG: + return a->lag.dev == b->lag.dev; + case DSA_DB_BRIDGE: + return a->bridge.num == b->bridge.num; + default: + WARN_ON(1); + return false; + } +} + +bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_mac_addr *a; + + lockdep_assert_held(&dp->addr_lists_lock); + + list_for_each_entry(a, &dp->fdbs, list) { + if (!ether_addr_equal(a->addr, addr) || a->vid != vid) + continue; + + if (a->db.type == db.type && !dsa_db_equal(&a->db, &db)) + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(dsa_fdb_present_in_other_db); + +bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_mac_addr *a; + + lockdep_assert_held(&dp->addr_lists_lock); + + list_for_each_entry(a, &dp->mdbs, list) { + if (!ether_addr_equal(a->addr, mdb->addr) || a->vid != mdb->vid) + continue; + + if (a->db.type == db.type && !dsa_db_equal(&a->db, &db)) + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db); + static int __init dsa_init_module(void) { int rc; @@ -434,8 +536,16 @@ static int __init dsa_init_module(void) dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), THIS_MODULE); + rc = rtnl_link_register(&dsa_link_ops); + if (rc) + goto netlink_register_fail; + return 0; +netlink_register_fail: + dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); + dsa_slave_unregister_notifier(); + dev_remove_pack(&dsa_pack_type); register_notifier_fail: destroy_workqueue(dsa_owq); @@ -445,6 +555,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + rtnl_link_unregister(&dsa_link_ops); dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); dsa_slave_unregister_notifier(); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index e7c30b472034..e504a18fc125 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -13,16 +13,212 @@ #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/of.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include <net/devlink.h> +#include <net/sch_generic.h> #include "dsa_priv.h" -static LIST_HEAD(dsa_tree_list); static DEFINE_MUTEX(dsa2_mutex); +LIST_HEAD(dsa_tree_list); + +/* Track the bridges with forwarding offload enabled */ +static unsigned long dsa_fwd_offloading_bridges; + +/** + * dsa_tree_notify - Execute code for all switches in a DSA switch tree. + * @dst: collection of struct dsa_switch devices to notify. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Given a struct dsa_switch_tree, this can be used to run a function once for + * each member DSA switch. The other alternative of traversing the tree is only + * through its ports list, which does not uniquely list the switches. + */ +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) +{ + struct raw_notifier_head *nh = &dst->nh; + int err; -static const struct devlink_ops dsa_devlink_ops = { -}; + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + +/** + * dsa_broadcast - Notify all DSA trees in the system. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Can be used to notify the switching fabric of events such as cross-chip + * bridging between disjoint trees (such as islands of tagger-compatible + * switches bridged by an incompatible middle switch). + * + * WARNING: this function is not reliable during probe time, because probing + * between trees is asynchronous and not all DSA trees might have probed. + */ +int dsa_broadcast(unsigned long e, void *v) +{ + struct dsa_switch_tree *dst; + int err = 0; + + list_for_each_entry(dst, &dsa_tree_list, list) { + err = dsa_tree_notify(dst, e, v); + if (err) + break; + } + + return err; +} + +/** + * dsa_lag_map() - Map LAG structure to a linear LAG array + * @dst: Tree in which to record the mapping. + * @lag: LAG structure that is to be mapped to the tree's array. + * + * dsa_lag_id/dsa_lag_by_id can then be used to translate between the + * two spaces. The size of the mapping space is determined by the + * driver by setting ds->num_lag_ids. It is perfectly legal to leave + * it unset if it is not needed, in which case these functions become + * no-ops. + */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag) +{ + unsigned int id; + + for (id = 1; id <= dst->lags_len; id++) { + if (!dsa_lag_by_id(dst, id)) { + dst->lags[id - 1] = lag; + lag->id = id; + return; + } + } + + /* No IDs left, which is OK. Some drivers do not need it. The + * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id + * returns an error for this device when joining the LAG. The + * driver can then return -EOPNOTSUPP back to DSA, which will + * fall back to a software LAG. + */ +} + +/** + * dsa_lag_unmap() - Remove a LAG ID mapping + * @dst: Tree in which the mapping is recorded. + * @lag: LAG structure that was mapped. + * + * As there may be multiple users of the mapping, it is only removed + * if there are no other references to it. + */ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag) +{ + unsigned int id; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_by_id(dst, id) == lag) { + dst->lags[id - 1] = NULL; + lag->id = 0; + break; + } + } +} + +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_lag_dev_get(dp) == lag_dev) + return dp->lag; + + return NULL; +} + +struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, + const struct net_device *br) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_bridge_dev_get(dp) == br) + return dp->bridge; + + return NULL; +} + +static int dsa_bridge_num_find(const struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst; + + list_for_each_entry(dst, &dsa_tree_list, list) { + struct dsa_bridge *bridge; + + bridge = dsa_tree_bridge_find(dst, bridge_dev); + if (bridge) + return bridge->num; + } + + return 0; +} + +unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) +{ + unsigned int bridge_num = dsa_bridge_num_find(bridge_dev); + + /* Switches without FDB isolation support don't get unique + * bridge numbering + */ + if (!max) + return 0; + + if (!bridge_num) { + /* First port that requests FDB isolation or TX forwarding + * offload for this bridge + */ + bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES, + 1); + if (bridge_num >= max) + return 0; + + set_bit(bridge_num, &dsa_fwd_offloading_bridges); + } + + return bridge_num; +} + +void dsa_bridge_num_put(const struct net_device *bridge_dev, + unsigned int bridge_num) +{ + /* Since we refcount bridges, we know that when we call this function + * it is no longer in use, so we can just go ahead and remove it from + * the bit mask. + */ + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); +} + +struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + list_for_each_entry(dst, &dsa_tree_list, list) { + if (dst->index != tree_index) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->index != sw_index) + continue; + + return dp->ds; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_switch_find); static struct dsa_switch_tree *dsa_tree_find(int index) { @@ -59,6 +255,8 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) static void dsa_tree_free(struct dsa_switch_tree *dst) { + if (dst->tag_ops) + dsa_tag_driver_put(dst->tag_ops); list_del(&dst->list); kfree(dst); } @@ -97,21 +295,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_tree_release); } -static bool dsa_port_is_dsa(struct dsa_port *port) -{ - return port->type == DSA_PORT_TYPE_DSA; -} - -static bool dsa_port_is_cpu(struct dsa_port *port) -{ - return port->type == DSA_PORT_TYPE_CPU; -} - -static bool dsa_port_is_user(struct dsa_port *dp) -{ - return dp->type == DSA_PORT_TYPE_USER; -} - static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, struct device_node *dn) { @@ -204,6 +387,23 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +{ + struct device_node *ethernet; + struct net_device *master; + struct dsa_port *cpu_dp; + + cpu_dp = dsa_tree_find_first_cpu(dst); + ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); + master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); + + return master; +} + +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. + */ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp, *dp; @@ -214,15 +414,45 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return -EINVAL; } - /* Assign the default CPU port to all ports of the fabric */ - list_for_each_entry(dp, &dst->ports, list) + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = cpu_dp; + } return 0; } -static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) + continue; + + /* Prefer a local CPU port */ + dsa_switch_for_each_port(dp, cpu_dp->ds) { + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; + + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + } + + return dsa_tree_setup_default_cpu(dst); +} + +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -231,39 +461,102 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) dp->cpu_dp = NULL; } -static int dsa_port_setup(struct dsa_port *dp) +static int dsa_port_devlink_setup(struct dsa_port *dp) { + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch_tree *dst = dp->ds->dst; + struct devlink_port_attrs attrs = {}; + struct devlink *dl = dp->ds->devlink; struct dsa_switch *ds = dp->ds; - struct dsa_switch_tree *dst = ds->dst; - const unsigned char *id = (const unsigned char *)&dst->index; - const unsigned char len = sizeof(dst->index); + const unsigned char *id; + unsigned char len; + int err; + + memset(dlp, 0, sizeof(*dlp)); + devlink_port_init(dl, dlp); + + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + id = (const unsigned char *)&dst->index; + len = sizeof(dst->index); + + attrs.phys.port_number = dp->index; + memcpy(attrs.switch_id.id, id, len); + attrs.switch_id.id_len = len; + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; + break; + case DSA_PORT_TYPE_CPU: + attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; + break; + case DSA_PORT_TYPE_DSA: + attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; + break; + case DSA_PORT_TYPE_USER: + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + break; + } + + devlink_port_attrs_set(dlp, &attrs); + err = devlink_port_register(dl, dlp, dp->index); + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + return err; + } + + return 0; +} + +static void dsa_port_devlink_teardown(struct dsa_port *dp) +{ + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; + + devlink_port_unregister(dlp); + + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + + devlink_port_fini(dlp); +} + +static int dsa_port_setup(struct dsa_port *dp) +{ struct devlink_port *dlp = &dp->devlink_port; bool dsa_port_link_registered = false; - bool devlink_port_registered = false; - struct devlink *dl = ds->devlink; + struct dsa_switch *ds = dp->ds; bool dsa_port_enabled = false; int err = 0; if (dp->setup) return 0; + err = dsa_port_devlink_setup(dp); + if (err) + return err; + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: dsa_port_disable(dp); break; case DSA_PORT_TYPE_CPU: - memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU, - dp->index, false, 0, id, len); - err = devlink_port_register(dl, dlp, dp->index); - if (err) - break; - devlink_port_registered = true; - - err = dsa_port_link_register_of(dp); - if (err) - break; - dsa_port_link_registered = true; + if (dp->dn) { + err = dsa_shared_port_link_register_of(dp); + if (err) + break; + dsa_port_link_registered = true; + } else { + dev_warn(ds->dev, + "skipping link registration for CPU port %d\n", + dp->index); + } err = dsa_port_enable(dp, NULL); if (err) @@ -272,18 +565,16 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_DSA: - memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA, - dp->index, false, 0, id, len); - err = devlink_port_register(dl, dlp, dp->index); - if (err) - break; - devlink_port_registered = true; - - err = dsa_port_link_register_of(dp); - if (err) - break; - dsa_port_link_registered = true; + if (dp->dn) { + err = dsa_shared_port_link_register_of(dp); + if (err) + break; + dsa_port_link_registered = true; + } else { + dev_warn(ds->dev, + "skipping link registration for DSA port %d\n", + dp->index); + } err = dsa_port_enable(dp, NULL); if (err) @@ -292,15 +583,7 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_USER: - memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL, - dp->index, false, 0, id, len); - err = devlink_port_register(dl, dlp, dp->index); - if (err) - break; - devlink_port_registered = true; - - dp->mac = of_get_mac_address(dp->dn); + of_get_mac_address(dp->dn, dp->mac); err = dsa_slave_create(dp); if (err) break; @@ -312,11 +595,11 @@ static int dsa_port_setup(struct dsa_port *dp) if (err && dsa_port_enabled) dsa_port_disable(dp); if (err && dsa_port_link_registered) - dsa_port_link_unregister_of(dp); - if (err && devlink_port_registered) - devlink_port_unregister(dlp); - if (err) + dsa_shared_port_link_unregister_of(dp); + if (err) { + dsa_port_devlink_teardown(dp); return err; + } dp->setup = true; @@ -330,22 +613,22 @@ static void dsa_port_teardown(struct dsa_port *dp) if (!dp->setup) return; + devlink_port_type_clear(dlp); + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: dsa_port_disable(dp); - dsa_tag_driver_put(dp->tag_ops); - devlink_port_unregister(dlp); - dsa_port_link_unregister_of(dp); + if (dp->dn) + dsa_shared_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_DSA: dsa_port_disable(dp); - devlink_port_unregister(dlp); - dsa_port_link_unregister_of(dp); + if (dp->dn) + dsa_shared_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - devlink_port_unregister(dlp); if (dp->slave) { dsa_slave_destroy(dp->slave); dp->slave = NULL; @@ -353,12 +636,238 @@ static void dsa_port_teardown(struct dsa_port *dp) break; } + dsa_port_devlink_teardown(dp); + dp->setup = false; } +static int dsa_port_setup_as_unused(struct dsa_port *dp) +{ + dp->type = DSA_PORT_TYPE_UNUSED; + return dsa_port_setup(dp); +} + +static int dsa_devlink_info_get(struct devlink *dl, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (ds->ops->devlink_info_get) + return ds->ops->devlink_info_get(ds, req, extack); + + return -EOPNOTSUPP; +} + +static int dsa_devlink_sb_pool_get(struct devlink *dl, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index, + pool_info); +} + +static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size, + threshold_type, extack); +} + +static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index, + pool_index, p_threshold); +} + +static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index, + pool_index, threshold, extack); +} + +static int +dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index, + tc_index, pool_type, + pool_index, threshold, + extack); +} + +static int dsa_devlink_sb_occ_snapshot(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_snapshot) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_snapshot(ds, sb_index); +} + +static int dsa_devlink_sb_occ_max_clear(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_max_clear) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_max_clear(ds, sb_index); +} + +static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, + u16 pool_index, u32 *p_cur, + u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index, + pool_index, p_cur, p_max); +} + +static int +dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port, + sb_index, tc_index, + pool_type, p_cur, + p_max); +} + +static const struct devlink_ops dsa_devlink_ops = { + .info_get = dsa_devlink_info_get, + .sb_pool_get = dsa_devlink_sb_pool_get, + .sb_pool_set = dsa_devlink_sb_pool_set, + .sb_port_pool_get = dsa_devlink_sb_port_pool_get, + .sb_port_pool_set = dsa_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot, + .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get, +}; + +static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) +{ + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (tag_ops->proto == dst->default_proto) + goto connect; + + rtnl_lock(); + err = ds->ops->change_tag_protocol(ds, tag_ops->proto); + rtnl_unlock(); + if (err) { + dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + return err; + } + +connect: + if (tag_ops->connect) { + err = tag_ops->connect(ds); + if (err) + return err; + } + + if (ds->ops->connect_tag_protocol) { + err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); + if (err) { + dev_err(ds->dev, + "Unable to connect to tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + goto disconnect; + } + } + + return 0; + +disconnect: + if (tag_ops->disconnect) + tag_ops->disconnect(ds); + + return err; +} + static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; + struct device_node *dn; int err; if (ds->setup) @@ -374,52 +883,59 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); - if (err) - goto free_devlink; - err = dsa_switch_register_notifier(ds); if (err) - goto unregister_devlink; + goto devlink_free; + + ds->configure_vlan_while_not_filtering = true; err = ds->ops->setup(ds); if (err < 0) goto unregister_notifier; - devlink_params_publish(ds->devlink); + err = dsa_switch_setup_tag_protocol(ds); + if (err) + goto teardown; if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) { err = -ENOMEM; - goto unregister_notifier; + goto teardown; } dsa_slave_mii_bus_init(ds); - err = mdiobus_register(ds->slave_mii_bus); + dn = of_get_child_by_name(ds->dev->of_node, "mdio"); + + err = of_mdiobus_register(ds->slave_mii_bus, dn); + of_node_put(dn); if (err < 0) - goto unregister_notifier; + goto free_slave_mii_bus; } ds->setup = true; - + devlink_register(ds->devlink); return 0; +free_slave_mii_bus: + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->slave_mii_bus); +teardown: + if (ds->ops->teardown) + ds->ops->teardown(ds); unregister_notifier: dsa_switch_unregister_notifier(ds); -unregister_devlink: - devlink_unregister(ds->devlink); -free_devlink: +devlink_free: devlink_free(ds->devlink); ds->devlink = NULL; - return err; } @@ -428,16 +944,21 @@ static void dsa_switch_teardown(struct dsa_switch *ds) if (!ds->setup) return; - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); + if (ds->devlink) + devlink_unregister(ds->devlink); - dsa_switch_unregister_notifier(ds); + if (ds->slave_mii_bus && ds->ops->phy_read) { + mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); + ds->slave_mii_bus = NULL; + } if (ds->ops->teardown) ds->ops->teardown(ds); + dsa_switch_unregister_notifier(ds); + if (ds->devlink) { - devlink_unregister(ds->devlink); devlink_free(ds->devlink); ds->devlink = NULL; } @@ -445,69 +966,154 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->setup = false; } -static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +/* First tear down the non-shared, then the shared ports. This ensures that + * all work items scheduled by our switchdev handlers for user ports have + * completed before we destroy the refcounting kept on the shared ports. + */ +static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; - int err; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) + dsa_port_teardown(dp); + + dsa_flush_workqueue(); + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + dsa_port_teardown(dp); +} + +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + dsa_switch_teardown(dp->ds); +} + +/* Bring shared ports up first, then non-shared ports */ +static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + int err = 0; list_for_each_entry(dp, &dst->ports, list) { - err = dsa_switch_setup(dp->ds); - if (err) - goto teardown; + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) { + err = dsa_port_setup(dp); + if (err) + goto teardown; + } } list_for_each_entry(dp, &dst->ports, list) { - err = dsa_port_setup(dp); - if (err) - goto teardown; + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { + err = dsa_port_setup(dp); + if (err) { + err = dsa_port_setup_as_unused(dp); + if (err) + goto teardown; + } + } } return 0; teardown: - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); - - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); + dsa_tree_teardown_ports(dst); return err; } -static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_port *dp; + int err = 0; - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); + list_for_each_entry(dp, &dst->ports, list) { + err = dsa_switch_setup(dp->ds); + if (err) { + dsa_tree_teardown_switches(dst); + break; + } + } - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); + return err; } static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { - struct dsa_port *dp; - int err; + struct dsa_port *cpu_dp; + int err = 0; - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_cpu(dp)) { - err = dsa_master_setup(dp->master, dp); - if (err) - return err; - } + rtnl_lock(); + + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; + bool admin_up = (master->flags & IFF_UP) && + !qdisc_tx_is_noop(master); + + err = dsa_master_setup(master, cpu_dp); + if (err) + break; + + /* Replay master state event */ + dsa_tree_master_admin_state_change(dst, master, admin_up); + dsa_tree_master_oper_state_change(dst, master, + netif_oper_up(master)); } - return 0; + rtnl_unlock(); + + return err; } static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) { + struct dsa_port *cpu_dp; + + rtnl_lock(); + + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; + + /* Synthesizing an "admin down" state is sufficient for + * the switches to get a notification if the master is + * currently up and running. + */ + dsa_tree_master_admin_state_change(dst, master, false); + + dsa_master_teardown(master); + } + + rtnl_unlock(); +} + +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) +{ + unsigned int len = 0; struct dsa_port *dp; - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_cpu(dp)) - dsa_master_teardown(dp->master); + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->num_lag_ids > len) + len = dp->ds->num_lag_ids; + } + + if (!len) + return 0; + + dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); + if (!dst->lags) + return -ENOMEM; + + dst->lags_len = len; + return 0; +} + +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) +{ + kfree(dst->lags); } static int dsa_tree_setup(struct dsa_switch_tree *dst) @@ -525,28 +1131,40 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (!complete) return 0; - err = dsa_tree_setup_default_cpu(dst); + err = dsa_tree_setup_cpu_ports(dst); if (err) return err; err = dsa_tree_setup_switches(dst); if (err) - goto teardown_default_cpu; + goto teardown_cpu_ports; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) goto teardown_switches; + err = dsa_tree_setup_master(dst); + if (err) + goto teardown_ports; + + err = dsa_tree_setup_lags(dst); + if (err) + goto teardown_master; + dst->setup = true; pr_info("DSA: tree %d setup\n", dst->index); return 0; +teardown_master: + dsa_tree_teardown_master(dst); +teardown_ports: + dsa_tree_teardown_ports(dst); teardown_switches: dsa_tree_teardown_switches(dst); -teardown_default_cpu: - dsa_tree_teardown_default_cpu(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); return err; } @@ -558,11 +1176,15 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) if (!dst->setup) return; + dsa_tree_teardown_lags(dst); + dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); - dsa_tree_teardown_default_cpu(dst); + dsa_tree_teardown_cpu_ports(dst); list_for_each_entry_safe(dl, next, &dst->rtable, list) { list_del(&dl->list); @@ -574,13 +1196,152 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dst->setup = false; } +static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops) +{ + const struct dsa_device_ops *old_tag_ops = dst->tag_ops; + struct dsa_notifier_tag_proto_info info; + int err; + + dst->tag_ops = tag_ops; + + /* Notify the switches from this tree about the connection + * to the new tagger + */ + info.tag_ops = tag_ops; + err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info); + if (err && err != -EOPNOTSUPP) + goto out_disconnect; + + /* Notify the old tagger about the disconnection from this tree */ + info.tag_ops = old_tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); + + return 0; + +out_disconnect: + info.tag_ops = tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); + dst->tag_ops = old_tag_ops; + + return err; +} + +/* Since the dsa/tagging sysfs device attribute is per master, the assumption + * is that all DSA switches within a tree share the same tagger, otherwise + * they would have formed disjoint trees (different "dsa,member" values). + */ +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops) +{ + struct dsa_notifier_tag_proto_info info; + struct dsa_port *dp; + int err = -EBUSY; + + if (!rtnl_trylock()) + return restart_syscall(); + + /* At the moment we don't allow changing the tag protocol under + * traffic. The rtnl_mutex also happens to serialize concurrent + * attempts to change the tagging protocol. If we ever lift the IFF_UP + * restriction, there needs to be another mutex which serializes this. + */ + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp)->flags & IFF_UP) + goto out_unlock; + + if (dp->slave->flags & IFF_UP) + goto out_unlock; + } + + /* Notify the tag protocol change */ + info.tag_ops = tag_ops; + err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); + if (err) + goto out_unwind_tagger; + + err = dsa_tree_bind_tag_proto(dst, tag_ops); + if (err) + goto out_unwind_tagger; + + rtnl_unlock(); + + return 0; + +out_unwind_tagger: + info.tag_ops = old_tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); +out_unlock: + rtnl_unlock(); + return err; +} + +static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, + struct net_device *master) +{ + struct dsa_notifier_master_state_info info; + struct dsa_port *cpu_dp = master->dsa_ptr; + + info.master = master; + info.operational = dsa_port_master_is_operational(cpu_dp); + + dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); +} + +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + /* Don't keep track of admin state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (up && cpu_dp->master_oper_up)) + notify = true; + + cpu_dp->master_admin_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + /* Don't keep track of oper state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (cpu_dp->master_admin_up && up)) + notify = true; + + cpu_dp->master_oper_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) { struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; - list_for_each_entry(dp, &dst->ports, list) - if (dp->ds == ds && dp->index == index) + dsa_switch_for_each_port(dp, ds) + if (dp->index == index) return dp; dp = kzalloc(sizeof(*dp), GFP_KERNEL); @@ -590,6 +1351,11 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + mutex_init(&dp->addr_lists_lock); + mutex_init(&dp->vlans_lock); + INIT_LIST_HEAD(&dp->fdbs); + INIT_LIST_HEAD(&dp->mdbs); + INIT_LIST_HEAD(&dp->vlans); INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); @@ -640,30 +1406,88 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); } -static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, + const char *user_protocol) { + const struct dsa_device_ops *tag_ops = NULL; struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; - const struct dsa_device_ops *tag_ops; - enum dsa_tag_protocol tag_protocol; + enum dsa_tag_protocol default_proto; + + /* Find out which protocol the switch would prefer. */ + default_proto = dsa_get_tag_protocol(dp, master); + if (dst->default_proto) { + if (dst->default_proto != default_proto) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + return -EINVAL; + } + } else { + dst->default_proto = default_proto; + } + + /* See if the user wants to override that preference. */ + if (user_protocol) { + if (!ds->ops->change_tag_protocol) { + dev_err(ds->dev, "Tag protocol cannot be modified\n"); + return -EINVAL; + } + + tag_ops = dsa_find_tagger_by_name(user_protocol); + if (IS_ERR(tag_ops)) { + dev_warn(ds->dev, + "Failed to find a tagging driver for protocol %s, using default\n", + user_protocol); + tag_ops = NULL; + } + } + + if (!tag_ops) + tag_ops = dsa_tag_driver_get(default_proto); - tag_protocol = dsa_get_tag_protocol(dp, master); - tag_ops = dsa_tag_driver_get(tag_protocol); if (IS_ERR(tag_ops)) { if (PTR_ERR(tag_ops) == -ENOPROTOOPT) return -EPROBE_DEFER; + dev_warn(ds->dev, "No tagger for this switch\n"); - dp->master = NULL; return PTR_ERR(tag_ops); } + if (dst->tag_ops) { + if (dst->tag_ops != tag_ops) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + + dsa_tag_driver_put(tag_ops); + return -EINVAL; + } + + /* In the case of multiple CPU ports per switch, the tagging + * protocol is still reference-counted only per switch tree. + */ + dsa_tag_driver_put(tag_ops); + } else { + dst->tag_ops = tag_ops; + } + dp->master = master; dp->type = DSA_PORT_TYPE_CPU; - dp->filter = tag_ops->filter; - dp->rcv = tag_ops->rcv; - dp->tag_ops = tag_ops; + dsa_port_set_tag_protocol(dp, dst->tag_ops); dp->dst = dst; + /* At this point, the tree may be configured to use a different + * tagger than the one chosen by the switch driver during + * .setup, in the case when a user selects a custom protocol + * through the DT. + * + * This is resolved by syncing the driver with the tree in + * dsa_switch_setup_tag_protocol once .setup has run and the + * driver is ready to accept calls to .change_tag_protocol. If + * the driver does not support the custom protocol at that + * point, the tree is wholly rejected, thereby ensuring that the + * tree and driver are always in agreement on the protocol to + * use. + */ return 0; } @@ -677,12 +1501,15 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (ethernet) { struct net_device *master; + const char *user_protocol; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; - return dsa_port_parse_cpu(dp, master); + user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); + return dsa_port_parse_cpu(dp, master, user_protocol); } if (link) @@ -701,16 +1528,25 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, ports = of_get_child_by_name(dn, "ports"); if (!ports) { - dev_err(ds->dev, "no ports child node found\n"); - return -EINVAL; + /* The second possibility is "ethernet-ports" */ + ports = of_get_child_by_name(dn, "ethernet-ports"); + if (!ports) { + dev_err(ds->dev, "no ports child node found\n"); + return -EINVAL; + } } for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", ®); - if (err) + if (err) { + of_node_put(port); goto out_put_node; + } if (reg >= ds->num_ports) { + dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n", + port, reg, ds->num_ports); + of_node_put(port); err = -EINVAL; goto out_put_node; } @@ -718,8 +1554,10 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, dp = dsa_to_port(ds, reg); err = dsa_port_parse_of(dp, port); - if (err) + if (err) { + of_node_put(port); goto out_put_node; + } } out_put_node: @@ -744,6 +1582,16 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, if (!ds->dst) return -ENOMEM; + if (dsa_switch_find(ds->dst->index, ds->index)) { + dev_err(ds->dev, + "A DSA switch with index %d already exists in tree %d\n", + ds->index, ds->dst->index); + return -EEXIST; + } + + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } @@ -788,7 +1636,7 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, dev_put(master); - return dsa_port_parse_cpu(dp, master); + return dsa_port_parse_cpu(dp, master, NULL); } if (!strcmp(name, "dsa")) @@ -851,12 +1699,12 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { - struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp, *next; - list_for_each_entry_safe(dp, next, &dst->ports, list) { - if (dp->ds != ds) - continue; + dsa_switch_for_each_port_safe(dp, next, ds) { + WARN_ON(!list_empty(&dp->fdbs)); + WARN_ON(!list_empty(&dp->mdbs)); + WARN_ON(!list_empty(&dp->vlans)); list_del(&dp->list); kfree(dp); } @@ -933,3 +1781,39 @@ void dsa_unregister_switch(struct dsa_switch *ds) mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); + +/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is + * blocking that operation from completion, due to the dev_hold taken inside + * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of + * the DSA master, so that the system can reboot successfully. + */ +void dsa_switch_shutdown(struct dsa_switch *ds) +{ + struct net_device *master, *slave_dev; + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); + + if (!ds->setup) + goto out; + + rtnl_lock(); + + dsa_switch_for_each_user_port(dp, ds) { + master = dsa_port_to_master(dp); + slave_dev = dp->slave; + + netdev_upper_dev_unlink(master, slave_dev); + } + + /* Disconnect from further netdevice notifiers on the master, + * since netdev_uses_dsa() will now return false. + */ + dsa_switch_for_each_cpu_port(dp, ds) + dp->master->dsa_ptr = NULL; + + rtnl_unlock(); +out: + mutex_unlock(&dsa2_mutex); +} +EXPORT_SYMBOL_GPL(dsa_switch_shutdown); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 760e6ea3178a..6e65c7ffd6f3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -7,10 +7,15 @@ #ifndef __DSA_PRIV_H #define __DSA_PRIV_H +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> #include <linux/phy.h> #include <linux/netdevice.h> #include <linux/netpoll.h> #include <net/dsa.h> +#include <net/gro_cells.h> + +#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG enum { DSA_NOTIFIER_AGEING_TIME, @@ -18,47 +23,130 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_HOST_FDB_ADD, + DSA_NOTIFIER_HOST_FDB_DEL, + DSA_NOTIFIER_LAG_FDB_ADD, + DSA_NOTIFIER_LAG_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, DSA_NOTIFIER_MDB_ADD, DSA_NOTIFIER_MDB_DEL, + DSA_NOTIFIER_HOST_MDB_ADD, + DSA_NOTIFIER_HOST_MDB_DEL, DSA_NOTIFIER_VLAN_ADD, DSA_NOTIFIER_VLAN_DEL, + DSA_NOTIFIER_HOST_VLAN_ADD, + DSA_NOTIFIER_HOST_VLAN_DEL, + DSA_NOTIFIER_MTU, + DSA_NOTIFIER_TAG_PROTO, + DSA_NOTIFIER_TAG_PROTO_CONNECT, + DSA_NOTIFIER_TAG_PROTO_DISCONNECT, + DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, + DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, + DSA_NOTIFIER_MASTER_STATE_CHANGE, }; /* DSA_NOTIFIER_AGEING_TIME */ struct dsa_notifier_ageing_time_info { - struct switchdev_trans *trans; unsigned int ageing_time; }; /* DSA_NOTIFIER_BRIDGE_* */ struct dsa_notifier_bridge_info { - struct net_device *br; - int sw_index; - int port; + const struct dsa_port *dp; + struct dsa_bridge bridge; + bool tx_fwd_offload; + struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_FDB_* */ struct dsa_notifier_fdb_info { - int sw_index; - int port; + const struct dsa_port *dp; + const unsigned char *addr; + u16 vid; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_LAG_FDB_* */ +struct dsa_notifier_lag_fdb_info { + struct dsa_lag *lag; const unsigned char *addr; u16 vid; + struct dsa_db db; }; /* DSA_NOTIFIER_MDB_* */ struct dsa_notifier_mdb_info { + const struct dsa_port *dp; const struct switchdev_obj_port_mdb *mdb; - struct switchdev_trans *trans; - int sw_index; - int port; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + const struct dsa_port *dp; + struct dsa_lag lag; + struct netdev_lag_upper_info *info; + struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { + const struct dsa_port *dp; const struct switchdev_obj_port_vlan *vlan; - struct switchdev_trans *trans; - int sw_index; - int port; + struct netlink_ext_ack *extack; +}; + +/* DSA_NOTIFIER_MTU */ +struct dsa_notifier_mtu_info { + const struct dsa_port *dp; + int mtu; +}; + +/* DSA_NOTIFIER_TAG_PROTO_* */ +struct dsa_notifier_tag_proto_info { + const struct dsa_device_ops *tag_ops; +}; + +/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ +struct dsa_notifier_tag_8021q_vlan_info { + const struct dsa_port *dp; + u16 vid; +}; + +/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ +struct dsa_notifier_master_state_info { + const struct net_device *master; + bool operational; +}; + +struct dsa_switchdev_event_work { + struct net_device *dev; + struct net_device *orig_dev; + struct work_struct work; + unsigned long event; + /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and + * SWITCHDEV_FDB_DEL_TO_DEVICE + */ + unsigned char addr[ETH_ALEN]; + u16 vid; + bool host_addr; +}; + +enum dsa_standalone_event { + DSA_UC_ADD, + DSA_UC_DEL, + DSA_MC_ADD, + DSA_MC_DEL, +}; + +struct dsa_standalone_event_work { + struct work_struct work; + struct net_device *dev; + enum dsa_standalone_event event; + unsigned char addr[ETH_ALEN]; + u16 vid; }; struct dsa_slave_priv { @@ -66,7 +154,7 @@ struct dsa_slave_priv { struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); - struct pcpu_sw_netstats *stats64; + struct gro_cells gcells; /* DSA port data, such as switch, port index, etc. */ struct dsa_port *dp; @@ -82,22 +170,26 @@ struct dsa_slave_priv { /* dsa.c */ const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol); void dsa_tag_driver_put(const struct dsa_device_ops *ops); +const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); + +bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b); bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); -int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags, - struct netlink_ext_ack *extack); -int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid); +static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) +{ + return ops->needed_headroom + ops->needed_tailroom; +} /* master.c */ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); void dsa_master_teardown(struct net_device *dev); +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int device, int port) @@ -114,56 +206,125 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, return NULL; } +/* netlink.c */ +extern struct rtnl_link_ops dsa_link_ops __read_mostly; + /* port.c */ -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans); +void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops); +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); +int dsa_port_set_mst_state(struct dsa_port *dp, + const struct switchdev_mst_state *state, + struct netlink_ext_ack *extack); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); -int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, + struct netlink_ext_ack *extack); +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans); -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans); + struct netlink_ext_ack *extack); +bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); +int dsa_port_mst_enable(struct dsa_port *dp, bool on, + struct netlink_ext_ack *extack); +int dsa_port_vlan_msti(struct dsa_port *dp, + const struct switchdev_vlan_msti *msti); +int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); +int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + const struct switchdev_obj_port_mdb *mdb); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans); +int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); +int dsa_port_bridge_flags(struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + struct netlink_ext_ack *extack); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags); -int dsa_port_vid_del(struct dsa_port *dp, u16 vid); -int dsa_port_link_register_of(struct dsa_port *dp); -void dsa_port_link_unregister_of(struct dsa_port *dp); -extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; +int dsa_port_host_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int dsa_port_host_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_port_mrp_add(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp); +int dsa_port_mrp_del(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp); +int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp); +int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp); +int dsa_port_phylink_create(struct dsa_port *dp); +void dsa_port_phylink_destroy(struct dsa_port *dp); +int dsa_shared_port_link_register_of(struct dsa_port *dp); +void dsa_shared_port_link_unregister_of(struct dsa_port *dp); +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); +void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; +extern struct notifier_block dsa_slave_switchdev_notifier; +extern struct notifier_block dsa_slave_switchdev_blocking_notifier; + void dsa_slave_mii_bus_init(struct dsa_switch *ds); int dsa_slave_create(struct dsa_port *dp); void dsa_slave_destroy(struct net_device *slave_dev); -bool dsa_slave_dev_check(const struct net_device *dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void dsa_slave_sync_ha(struct net_device *dev); +void dsa_slave_unsync_ha(struct net_device *dev); +void dsa_slave_setup_tagger(struct net_device *slave); +int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack); +int dsa_slave_manage_vlan_filtering(struct net_device *dev, + bool vlan_filtering); static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) { @@ -177,10 +338,250 @@ dsa_slave_to_master(const struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - return dp->cpu_dp->master; + return dsa_port_to_master(dp); +} + +/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged + * frames as untagged, since the bridge will not untag them. + */ +static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *dev = skb->dev; + struct net_device *upper_dev; + u16 vid, pvid, proto; + int err; + + if (!br || br_vlan_enabled(br)) + return skb; + + err = br_vlan_get_proto(br, &proto); + if (err) + return skb; + + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + /* We already run under an RCU read-side critical section since + * we are called from netif_receive_skb_list_internal(). + */ + err = br_vlan_get_pvid_rcu(dev, &pvid); + if (err) + return skb; + + if (vid != pvid) + return skb; + + /* The sad part about attempting to untag from DSA is that we + * don't know, unless we check, if the skb will end up in + * the bridge's data path - br_allowed_ingress() - or not. + * For example, there might be an 8021q upper for the + * default_pvid of the bridge, which will steal VLAN-tagged traffic + * from the bridge's data path. This is a configuration that DSA + * supports because vlan_filtering is 0. In that case, we should + * definitely keep the tag, to make sure it keeps working. + */ + upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); + if (upper_dev) + return skb; + + __vlan_hwaccel_clear_tag(skb); + + return skb; +} + +/* For switches without hardware support for DSA tagging to be able + * to support termination through the bridge. + */ +static inline struct net_device * +dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct bridge_vlan_info vinfo; + struct net_device *slave; + struct dsa_port *dp; + int err; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dp->bridge) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + /* Since the bridge might learn this packet, keep the CPU port + * affinity with the port that will be used for the reply on + * xmit. + */ + if (dp->cpu_dp != cpu_dp) + continue; + + slave = dp->slave; + + err = br_vlan_get_info_rcu(slave, vid, &vinfo); + if (err) + continue; + + return slave; + } + + return NULL; +} + +/* If the ingress port offloads the bridge, we mark the frame as autonomously + * forwarded by hardware, so the software bridge doesn't forward in twice, back + * to us, because we already did. However, if we're in fallback mode and we do + * software bridging, we are not offloading it, therefore the dp->bridge + * pointer is not populated, and flooding needs to be done by software (we are + * effectively operating in standalone ports mode). + */ +static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + + skb->offload_fwd_mark = !!(dp->bridge); +} + +/* Helper for removing DSA header tags from packets in the RX path. + * Must not be called before skb_pull(len). + * skb->data + * | + * v + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * | | + * <----- len -----> <----- len -----> + * | + * >>>>>>> v + * >>>>>>> | | | | | | | | | | | | | | | + * >>>>>>> +-----------------------+-----------------------+-------+ + * >>>>>>> | Destination MAC | Source MAC | EType | + * +-----------------------+-----------------------+-------+ + * ^ + * | + * skb->data + */ +static inline void dsa_strip_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN); +} + +/* Helper for creating space for DSA header tags in TX path packets. + * Must not be called before skb_push(len). + * + * Before: + * + * <<<<<<< | | | | | | | | | | | | | | | + * ^ <<<<<<< +-----------------------+-----------------------+-------+ + * | <<<<<<< | Destination MAC | Source MAC | EType | + * | +-----------------------+-----------------------+-------+ + * <----- len -----> + * | + * | + * skb->data + * + * After: + * + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * ^ | | + * | <----- len -----> + * skb->data + */ +static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data, skb->data + len, 2 * ETH_ALEN); +} + +/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from + * skb_mac_header(skb), which leaves skb->data pointing at the first byte after + * what the DSA master perceives as the EtherType (the beginning of the L3 + * protocol). Since DSA EtherType header taggers treat the EtherType as part of + * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header + * is located 2 bytes behind skb->data. Note that EtherType in this context + * means the first 2 bytes of the DSA header, not the encapsulated EtherType + * that will become visible after the DSA header is stripped. + */ +static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb) +{ + return skb->data - 2; +} + +/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType + * header taggers start exactly where the EtherType is (the EtherType is + * treated as part of the DSA header). + */ +static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) +{ + return skb->data + 2 * ETH_ALEN; } /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); + +static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_fdb_add && ds->ops->port_fdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_mdb_add && ds->ops->port_mdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +/* dsa2.c */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev); +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); +int dsa_broadcast(unsigned long e, void *v); +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops); +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); +unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); +void dsa_bridge_num_put(const struct net_device *bridge_dev, + unsigned int bridge_num); +struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, + const struct net_device *br); + +/* tag_8021q.c */ +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); + +extern struct list_head dsa_tree_list; + #endif diff --git a/net/dsa/master.c b/net/dsa/master.c index bd44bde272f4..40367ab41cf8 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -58,7 +58,7 @@ static void dsa_master_get_regs(struct net_device *dev, } cpu_info = (struct ethtool_drvinfo *)data; - strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); + strscpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); data += sizeof(*cpu_info); cpu_regs = (struct ethtool_regs *)data; data += sizeof(*cpu_regs); @@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; + int mcount = 0, count, i; uint8_t pfx[4]; uint8_t *ndata; @@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, */ ds->ops->get_strings(ds, port, stringset, ndata); count = ds->ops->get_sset_count(ds, port, stringset); + if (count < 0) + return; for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -186,17 +187,6 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -static int dsa_master_get_phys_port_name(struct net_device *dev, - char *name, size_t len) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - - if (snprintf(name, len, "p%d", cpu_dp->index) >= len) - return -EINVAL; - - return 0; -} - static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -220,18 +210,25 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - if (cpu_dp->orig_ndo_ops && cpu_dp->orig_ndo_ops->ndo_do_ioctl) - err = cpu_dp->orig_ndo_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_eth_ioctl) + err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd); return err; } +static const struct dsa_netdevice_ops dsa_netdev_ops = { + .ndo_eth_ioctl = dsa_master_ioctl, +}; + static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; + if (netif_is_lag_master(dev)) + return 0; + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); if (!ops) return -ENOMEM; @@ -256,53 +253,86 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; + if (netif_is_lag_master(dev)) + return; + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; } -static int dsa_master_ndo_setup(struct net_device *dev) +static void dsa_netdev_ops_set(struct net_device *dev, + const struct dsa_netdevice_ops *ops) { - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch *ds = cpu_dp->ds; - struct net_device_ops *ops; + if (netif_is_lag_master(dev)) + return; - if (dev->netdev_ops->ndo_get_phys_port_name) - return 0; - - ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); - if (!ops) - return -ENOMEM; + dev->dsa_ptr->netdev_ops = ops; +} - cpu_dp->orig_ndo_ops = dev->netdev_ops; - if (cpu_dp->orig_ndo_ops) - memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops)); +/* Keep the master always promiscuous if the tagging protocol requires that + * (garbles MAC DA) or if it doesn't support unicast filtering, case in which + * it would revert to promiscuous mode as soon as we call dev_uc_add() on it + * anyway. + */ +static void dsa_master_set_promiscuity(struct net_device *dev, int inc) +{ + const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name; - ops->ndo_do_ioctl = dsa_master_ioctl; + if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) + return; - dev->netdev_ops = ops; + ASSERT_RTNL(); - return 0; + dev_set_promiscuity(dev, inc); } -static void dsa_master_ndo_teardown(struct net_device *dev) +static ssize_t tagging_show(struct device *d, struct device_attribute *attr, + char *buf) { + struct net_device *dev = to_net_dev(d); struct dsa_port *cpu_dp = dev->dsa_ptr; - dev->netdev_ops = cpu_dp->orig_ndo_ops; - cpu_dp->orig_ndo_ops = NULL; + return sprintf(buf, "%s\n", + dsa_tag_protocol_to_str(cpu_dp->tag_ops)); } -static ssize_t tagging_show(struct device *d, struct device_attribute *attr, - char *buf) +static ssize_t tagging_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) { + const struct dsa_device_ops *new_tag_ops, *old_tag_ops; struct net_device *dev = to_net_dev(d); struct dsa_port *cpu_dp = dev->dsa_ptr; + int err; - return sprintf(buf, "%s\n", - dsa_tag_protocol_to_str(cpu_dp->tag_ops)); + old_tag_ops = cpu_dp->tag_ops; + new_tag_ops = dsa_find_tagger_by_name(buf); + /* Bad tagger name, or module is not loaded? */ + if (IS_ERR(new_tag_ops)) + return PTR_ERR(new_tag_ops); + + if (new_tag_ops == old_tag_ops) + /* Drop the temporarily held duplicate reference, since + * the DSA switch tree uses this tagger. + */ + goto out; + + err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops, + old_tag_ops); + if (err) { + /* On failure the old tagger is restored, so we don't need the + * driver for the new one. + */ + dsa_tag_driver_put(new_tag_ops); + return err; + } + + /* On success we no longer need the module for the old tagging protocol + */ +out: + dsa_tag_driver_put(old_tag_ops); + return count; } -static DEVICE_ATTR_RO(tagging); +static DEVICE_ATTR_RW(tagging); static struct attribute *dsa_slave_attrs[] = { &dev_attr_tagging.attr, @@ -314,37 +344,43 @@ static const struct attribute_group dsa_group = { .attrs = dsa_slave_attrs, }; -static void dsa_master_set_mtu(struct net_device *dev, struct dsa_port *cpu_dp) -{ - unsigned int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; - int err; - - rtnl_lock(); - if (mtu <= dev->max_mtu) { - err = dev_set_mtu(dev, mtu); - if (err) - netdev_dbg(dev, "Unable to set MTU to include for DSA overheads\n"); - } - rtnl_unlock(); -} - static void dsa_master_reset_mtu(struct net_device *dev) { int err; - rtnl_lock(); err = dev_set_mtu(dev, ETH_DATA_LEN); if (err) netdev_dbg(dev, "Unable to reset MTU to exclude DSA overheads\n"); - rtnl_unlock(); } int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { - int ret; + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); + + /* The DSA master must use SET_NETDEV_DEV for this to work. */ + if (!netif_is_lag_master(dev)) { + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + } - dsa_master_set_mtu(dev, cpu_dp); + /* The switch driver may not implement ->port_change_mtu(), case in + * which dsa_slave_change_mtu() will not update the master MTU either, + * so we need to do that here. + */ + ret = dev_set_mtu(dev, mtu); + if (ret) + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -353,13 +389,14 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + + dsa_master_set_promiscuity(dev, 1); + ret = dsa_master_ethtool_setup(dev); if (ret) - return ret; + goto out_err_reset_promisc; - ret = dsa_master_ndo_setup(dev); - if (ret) - goto out_err_ethtool_teardown; + dsa_netdev_ops_set(dev, &dsa_netdev_ops); ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) @@ -368,18 +405,20 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) return ret; out_err_ndo_teardown: - dsa_master_ndo_teardown(dev); -out_err_ethtool_teardown: + dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); +out_err_reset_promisc: + dsa_master_set_promiscuity(dev, -1); return ret; } void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); - dsa_master_ndo_teardown(dev); + dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev); + dsa_master_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; @@ -389,3 +428,52 @@ void dsa_master_teardown(struct net_device *dev) */ wmb(); } + +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + bool master_setup = false; + int err; + + if (!netdev_uses_dsa(lag_dev)) { + err = dsa_master_setup(lag_dev, cpu_dp); + if (err) + return err; + + master_setup = true; + } + + err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); + if (err) { + if (extack && !extack->_msg) + NL_SET_ERR_MSG_MOD(extack, + "CPU port failed to join LAG"); + goto out_master_teardown; + } + + return 0; + +out_master_teardown: + if (master_setup) + dsa_master_teardown(lag_dev); + return err; +} + +/* Tear down a master if there isn't any other user port on it, + * optionally also destroying LAG information. + */ +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp) +{ + struct net_device *upper; + struct list_head *iter; + + dsa_port_lag_leave(cpu_dp, lag_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) + if (dsa_slave_dev_check(upper)) + return; + + dsa_master_teardown(lag_dev); +} diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c new file mode 100644 index 000000000000..ecf9ed1de185 --- /dev/null +++ b/net/dsa/netlink.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 NXP + */ +#include <linux/netdevice.h> +#include <net/rtnetlink.h> + +#include "dsa_priv.h" + +static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { + [IFLA_DSA_MASTER] = { .type = NLA_U32 }, +}; + +static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + int err; + + if (!data) + return 0; + + if (data[IFLA_DSA_MASTER]) { + u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); + struct net_device *master; + + master = __dev_get_by_index(dev_net(dev), ifindex); + if (!master) + return -EINVAL; + + err = dsa_slave_change_master(dev, master, extack); + if (err) + return err; + } + + return 0; +} + +static size_t dsa_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */ + 0; +} + +static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *master = dsa_slave_to_master(dev); + + if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex)) + return -EMSGSIZE; + + return 0; +} + +struct rtnl_link_ops dsa_link_ops __read_mostly = { + .kind = "dsa", + .priv_size = sizeof(struct dsa_port), + .maxtype = IFLA_DSA_MAX, + .policy = dsa_policy, + .changelink = dsa_changelink, + .get_size = dsa_get_size, + .fill_info = dsa_fill_info, + .netns_refund = true, +}; diff --git a/net/dsa/port.c b/net/dsa/port.c index ec13dc666788..208168276995 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -7,38 +7,127 @@ */ #include <linux/if_bridge.h> +#include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include "dsa_priv.h" +/** + * dsa_port_notify - Notify the switching fabric of changes to a port + * @dp: port on which change occurred + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Notify all switches in the DSA tree that this port's switch belongs to, + * including this switch itself, of an event. Allows the other switches to + * reconfigure themselves for cross-chip operations. Can also be used to + * reconfigure ports without net_devices (CPU ports, DSA links) whenever + * a user port's state changes. + */ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { - struct raw_notifier_head *nh = &dp->ds->dst->nh; + return dsa_tree_notify(dp->ds->dst, e, v); +} + +static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp, u16 vid) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + struct switchdev_notifier_fdb_info info = { + .vid = vid, + }; + + /* When the port becomes standalone it has already left the bridge. + * Don't notify the bridge in that case. + */ + if (!brport_dev) + return; + + call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE, + brport_dev, &info.info, NULL); +} + +static void dsa_port_fast_age(const struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_fast_age) + return; + + ds->ops->port_fast_age(ds, dp->index); + + /* flush all VLANs */ + dsa_port_notify_bridge_fdb_flush(dp, 0); +} + +static int dsa_port_vlan_fast_age(const struct dsa_port *dp, u16 vid) +{ + struct dsa_switch *ds = dp->ds; int err; - err = raw_notifier_call_chain(nh, e, v); + if (!ds->ops->port_vlan_fast_age) + return -EOPNOTSUPP; + + err = ds->ops->port_vlan_fast_age(ds, dp->index, vid); + + if (!err) + dsa_port_notify_bridge_fdb_flush(dp, vid); + + return err; +} + +static int dsa_port_msti_fast_age(const struct dsa_port *dp, u16 msti) +{ + DECLARE_BITMAP(vids, VLAN_N_VID) = { 0 }; + int err, vid; + + err = br_mst_get_info(dsa_port_bridge_dev_get(dp), msti, vids); + if (err) + return err; + + for_each_set_bit(vid, vids, VLAN_N_VID) { + err = dsa_port_vlan_fast_age(dp, vid); + if (err) + return err; + } - return notifier_to_errno(err); + return 0; } -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans) +static bool dsa_port_can_configure_learning(struct dsa_port *dp) +{ + struct switchdev_brport_flags flags = { + .mask = BR_LEARNING, + }; + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags) + return false; + + err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL); + return !err; +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; + if (!ds->ops->port_stp_state_set) + return -EOPNOTSUPP; - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, port, state); + ds->ops->port_stp_state_set(ds, port, state); - if (ds->ops->port_fast_age) { + if (!dsa_port_can_configure_learning(dp) || + (do_fast_age && dp->learning)) { /* Fast age FDB entries or flush appropriate forwarding database * for the given port, if we are moving it from Learning or * Forwarding state, to Disabled or Blocking or Listening state. + * Ports that were standalone before the STP state change don't + * need to fast age the FDB, since address learning is off in + * standalone mode. */ if ((dp->stp_state == BR_STATE_LEARNING || @@ -46,7 +135,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, (state == BR_STATE_DISABLED || state == BR_STATE_BLOCKING || state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); + dsa_port_fast_age(dp); } dp->stp_state = state; @@ -54,13 +143,53 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, return 0; } -static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, + bool do_fast_age) +{ + struct dsa_switch *ds = dp->ds; + int err; + + err = dsa_port_set_state(dp, state, do_fast_age); + if (err && err != -EOPNOTSUPP) { + dev_err(ds->dev, "port %d failed to set STP state %u: %pe\n", + dp->index, state, ERR_PTR(err)); + } +} + +int dsa_port_set_mst_state(struct dsa_port *dp, + const struct switchdev_mst_state *state, + struct netlink_ext_ack *extack) { + struct dsa_switch *ds = dp->ds; + u8 prev_state; int err; - err = dsa_port_set_state(dp, state, NULL); + if (!ds->ops->port_mst_state_set) + return -EOPNOTSUPP; + + err = br_mst_get_state(dsa_port_to_bridge_port(dp), state->msti, + &prev_state); if (err) - pr_err("DSA: failed to set STP state %u (%d)\n", state, err); + return err; + + err = ds->ops->port_mst_state_set(ds, dp->index, state); + if (err) + return err; + + if (!(dp->learning && + (prev_state == BR_STATE_LEARNING || + prev_state == BR_STATE_FORWARDING) && + (state->state == BR_STATE_DISABLED || + state->state == BR_STATE_BLOCKING || + state->state == BR_STATE_LISTENING))) + return 0; + + err = dsa_port_msti_fast_age(dp, state->msti); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "Unable to flush associated VLANs"); + + return 0; } int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) @@ -75,8 +204,8 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) return err; } - if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + if (!dp->bridge) + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false); if (dp->pl) phylink_start(dp->pl); @@ -103,8 +232,8 @@ void dsa_port_disable_rt(struct dsa_port *dp) if (dp->pl) phylink_stop(dp->pl); - if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_DISABLED); + if (!dp->bridge) + dsa_port_set_state_now(dp, BR_STATE_DISABLED, false); if (ds->ops->port_disable) ds->ops->port_disable(ds, port); @@ -117,68 +246,503 @@ void dsa_port_disable(struct dsa_port *dp) rtnl_unlock(); } -int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) +static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, + struct dsa_bridge bridge) { - struct dsa_notifier_bridge_info info = { - .sw_index = dp->ds->index, - .port = dp->index, - .br = br, - }; + struct netlink_ext_ack extack = {0}; + bool change_vlan_filtering = false; + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + bool vlan_filtering; int err; - /* Set the flooding mode before joining the port in the switch */ - err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL); + if (ds->needs_standalone_vlan_filtering && + !br_vlan_enabled(bridge.dev)) { + change_vlan_filtering = true; + vlan_filtering = true; + } else if (!ds->needs_standalone_vlan_filtering && + br_vlan_enabled(bridge.dev)) { + change_vlan_filtering = true; + vlan_filtering = false; + } + + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an + * event for changing vlan_filtering setting upon slave ports leaving + * it. That is a good thing, because that lets us handle it and also + * handle the case where the switch's vlan_filtering setting is global + * (not per port). When that happens, the correct moment to trigger the + * vlan_filtering callback is only when the last port leaves the last + * VLAN-aware bridge. + */ + if (change_vlan_filtering && ds->vlan_filtering_is_global) { + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(other_dp); + + if (br && br_vlan_enabled(br)) { + change_vlan_filtering = false; + break; + } + } + } + + if (!change_vlan_filtering) + return; + + err = dsa_port_vlan_filtering(dp, vlan_filtering, &extack); + if (extack._msg) { + dev_err(ds->dev, "port %d: %s\n", dp->index, + extack._msg); + } + if (err && err != -EOPNOTSUPP) { + dev_err(ds->dev, + "port %d failed to reset VLAN filtering to %d: %pe\n", + dp->index, vlan_filtering, ERR_PTR(err)); + } +} + +static int dsa_port_inherit_brport_flags(struct dsa_port *dp, + struct netlink_ext_ack *extack) +{ + const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | + BR_BCAST_FLOOD | BR_PORT_LOCKED; + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + int flag, err; + + for_each_set_bit(flag, &mask, 32) { + struct switchdev_brport_flags flags = {0}; + + flags.mask = BIT(flag); + + if (br_port_flag_is_set(brport_dev, BIT(flag))) + flags.val = BIT(flag); + + err = dsa_port_bridge_flags(dp, flags, extack); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; +} + +static void dsa_port_clear_brport_flags(struct dsa_port *dp) +{ + const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | + BR_BCAST_FLOOD | BR_PORT_LOCKED; + int flag, err; + + for_each_set_bit(flag, &mask, 32) { + struct switchdev_brport_flags flags = {0}; + + flags.mask = BIT(flag); + flags.val = val & BIT(flag); + + err = dsa_port_bridge_flags(dp, flags, NULL); + if (err && err != -EOPNOTSUPP) + dev_err(dp->ds->dev, + "failed to clear bridge port flag %lu: %pe\n", + flags.val, ERR_PTR(err)); + } +} + +static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, + struct netlink_ext_ack *extack) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + struct net_device *br = dsa_port_bridge_dev_get(dp); + int err; + + err = dsa_port_inherit_brport_flags(dp, extack); if (err) return err; + err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false); + if (err && err != -EOPNOTSUPP) + return err; + + err = dsa_port_vlan_filtering(dp, br_vlan_enabled(br), extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = dsa_port_ageing_time(dp, br_get_ageing_time(br)); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp, + struct dsa_bridge bridge) +{ + /* Configure the port for standalone mode (no address learning, + * flood everything). + * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events + * when the user requests it through netlink or sysfs, but not + * automatically at port join or leave, so we need to handle resetting + * the brport flags ourselves. But we even prefer it that way, because + * otherwise, some setups might never get the notification they need, + * for example, when a port leaves a LAG that offloads the bridge, + * it becomes standalone, but as far as the bridge is concerned, no + * port ever left. + */ + dsa_port_clear_brport_flags(dp); + + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, + * so allow it to be in BR_STATE_FORWARDING to be kept functional + */ + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true); + + dsa_port_reset_vlan_filtering(dp, bridge); + + /* Ageing time may be global to the switch chip, so don't change it + * here because we have no good reason (or value) to change it to. + */ +} + +static int dsa_port_bridge_create(struct dsa_port *dp, + struct net_device *br, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_bridge *bridge; + + bridge = dsa_tree_bridge_find(ds->dst, br); + if (bridge) { + refcount_inc(&bridge->refcount); + dp->bridge = bridge; + return 0; + } + + bridge = kzalloc(sizeof(*bridge), GFP_KERNEL); + if (!bridge) + return -ENOMEM; + + refcount_set(&bridge->refcount, 1); + + bridge->dev = br; + + bridge->num = dsa_bridge_num_get(br, ds->max_num_bridges); + if (ds->max_num_bridges && !bridge->num) { + NL_SET_ERR_MSG_MOD(extack, + "Range of offloadable bridges exceeded"); + kfree(bridge); + return -EOPNOTSUPP; + } + + dp->bridge = bridge; + + return 0; +} + +static void dsa_port_bridge_destroy(struct dsa_port *dp, + const struct net_device *br) +{ + struct dsa_bridge *bridge = dp->bridge; + + dp->bridge = NULL; + + if (!refcount_dec_and_test(&bridge->refcount)) + return; + + if (bridge->num) + dsa_bridge_num_put(br, bridge->num); + + kfree(bridge); +} + +static bool dsa_port_supports_mst(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + return ds->ops->vlan_msti_set && + ds->ops->port_mst_state_set && + ds->ops->port_vlan_fast_age && + dsa_port_can_configure_learning(dp); +} + +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, + struct netlink_ext_ack *extack) +{ + struct dsa_notifier_bridge_info info = { + .dp = dp, + .extack = extack, + }; + struct net_device *dev = dp->slave; + struct net_device *brport_dev; + int err; + + if (br_mst_enabled(br) && !dsa_port_supports_mst(dp)) + return -EOPNOTSUPP; + /* Here the interface is already bridged. Reflect the current * configuration so that drivers can program their chips accordingly. */ - dp->bridge_dev = br; + err = dsa_port_bridge_create(dp, br, extack); + if (err) + return err; - err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info); + brport_dev = dsa_port_to_bridge_port(dp); - /* The bridging is rolled back on error */ - if (err) { - dsa_port_bridge_flags(dp, 0, NULL); - dp->bridge_dev = NULL; - } + info.bridge = *dp->bridge; + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info); + if (err) + goto out_rollback; + + /* Drivers which support bridge TX forwarding should set this */ + dp->bridge->tx_fwd_offload = info.tx_fwd_offload; + + err = switchdev_bridge_port_offload(brport_dev, dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier, + dp->bridge->tx_fwd_offload, extack); + if (err) + goto out_rollback_unbridge; + + err = dsa_port_switchdev_sync_attrs(dp, extack); + if (err) + goto out_rollback_unoffload; + + return 0; +out_rollback_unoffload: + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); + dsa_flush_workqueue(); +out_rollback_unbridge: + dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); +out_rollback: + dsa_port_bridge_destroy(dp, br); return err; } +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + + /* Don't try to unoffload something that is not offloaded */ + if (!brport_dev) + return; + + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); + + dsa_flush_workqueue(); +} + void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { - .sw_index = dp->ds->index, - .port = dp->index, - .br = br, + .dp = dp, }; int err; + /* If the port could not be offloaded to begin with, then + * there is nothing to do. + */ + if (!dp->bridge) + return; + + info.bridge = *dp->bridge; + /* Here the port is already unbridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ - dp->bridge_dev = NULL; + dsa_port_bridge_destroy(dp, br); - err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) - pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); + + dsa_port_switchdev_unsync_attrs(dp, info.bridge); +} - /* Port is leaving the bridge, disable flooding */ - dsa_port_bridge_flags(dp, 0, NULL); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo) +{ + struct dsa_notifier_lag_info info = { + .dp = dp, + }; + bool tx_enabled; - /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, - * so allow it to be in BR_STATE_FORWARDING to be kept functional + if (!dp->lag) + return 0; + + /* On statically configured aggregates (e.g. loadbalance + * without LACP) ports will always be tx_enabled, even if the + * link is down. Thus we require both link_up and tx_enabled + * in order to include it in the tx set. */ - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + tx_enabled = linfo->link_up && linfo->tx_enabled; + + if (tx_enabled == dp->lag_tx_enabled) + return 0; + + dp->lag_tx_enabled = tx_enabled; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); } +static int dsa_port_lag_create(struct dsa_port *dp, + struct net_device *lag_dev) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_lag *lag; + + lag = dsa_tree_lag_find(ds->dst, lag_dev); + if (lag) { + refcount_inc(&lag->refcount); + dp->lag = lag; + return 0; + } + + lag = kzalloc(sizeof(*lag), GFP_KERNEL); + if (!lag) + return -ENOMEM; + + refcount_set(&lag->refcount, 1); + mutex_init(&lag->fdb_lock); + INIT_LIST_HEAD(&lag->fdbs); + lag->dev = lag_dev; + dsa_lag_map(ds->dst, lag); + dp->lag = lag; + + return 0; +} + +static void dsa_port_lag_destroy(struct dsa_port *dp) +{ + struct dsa_lag *lag = dp->lag; + + dp->lag = NULL; + dp->lag_tx_enabled = false; + + if (!refcount_dec_and_test(&lag->refcount)) + return; + + WARN_ON(!list_empty(&lag->fdbs)); + dsa_lag_unmap(dp->ds->dst, lag); + kfree(lag); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + struct dsa_notifier_lag_info info = { + .dp = dp, + .info = uinfo, + .extack = extack, + }; + struct net_device *bridge_dev; + int err; + + err = dsa_port_lag_create(dp, lag_dev); + if (err) + goto err_lag_create; + + info.lag = *dp->lag; + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); + if (err) + goto err_lag_join; + + bridge_dev = netdev_master_upper_dev_get(lag_dev); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return 0; + + err = dsa_port_bridge_join(dp, bridge_dev, extack); + if (err) + goto err_bridge_join; + + return 0; + +err_bridge_join: + dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); +err_lag_join: + dsa_port_lag_destroy(dp); +err_lag_create: + return err; +} + +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) +{ + struct net_device *br = dsa_port_bridge_dev_get(dp); + + if (br) + dsa_port_pre_bridge_leave(dp, br); +} + +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) +{ + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct dsa_notifier_lag_info info = { + .dp = dp, + }; + int err; + + if (!dp->lag) + return; + + /* Port might have been part of a LAG that in turn was + * attached to a bridge. + */ + if (br) + dsa_port_bridge_leave(dp, br); + + info.lag = *dp->lag; + + dsa_port_lag_destroy(dp); + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); + if (err) + dev_err(dp->ds->dev, + "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n", + dp->index, ERR_PTR(err)); +} + +/* Must be called under rcu_read_lock() */ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, - bool vlan_filtering) + bool vlan_filtering, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - int i; + struct dsa_port *other_dp; + int err; + + /* VLAN awareness was off, so the question is "can we turn it on". + * We may have had 8021q uppers, those need to go. Make sure we don't + * enter an inconsistent state: deny changing the VLAN awareness state + * as long as we have 8021q uppers. + */ + if (vlan_filtering && dsa_port_is_user(dp)) { + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *upper_dev, *slave = dp->slave; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { + struct bridge_vlan_info br_info; + u16 vid; + + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the + * device, respectively the VID is not found, returning + * 0 means success, which is a failure for us here. + */ + err = br_vlan_get_info(br, vid, &br_info); + if (err == 0) { + NL_SET_ERR_MSG_MOD(extack, + "Must first remove VLAN uppers having VIDs also present in bridge"); + return false; + } + } + } if (!ds->vlan_filtering_is_global) return true; @@ -188,19 +752,18 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, * different ports of the same switch device and one of them has a * different setting than what is being requested. */ - for (i = 0; i < ds->num_ports; i++) { - struct net_device *other_bridge; + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *other_br = dsa_port_bridge_dev_get(other_dp); - other_bridge = dsa_to_port(ds, i)->bridge_dev; - if (!other_bridge) - continue; /* If it's the same bridge, it also has same * vlan_filtering setting => no need to check */ - if (other_bridge == dp->bridge_dev) + if (!other_br || other_br == dsa_port_bridge_dev_get(dp)) continue; - if (br_vlan_enabled(other_bridge) != vlan_filtering) { - dev_err(ds->dev, "VLAN filtering is a global setting\n"); + + if (br_vlan_enabled(other_br) != vlan_filtering) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN filtering is a global setting"); return false; } } @@ -208,105 +771,210 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, } int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans) + struct netlink_ext_ack *extack) { + bool old_vlan_filtering = dsa_port_is_vlan_filtering(dp); struct dsa_switch *ds = dp->ds; + bool apply; int err; - /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (!ds->ops->port_vlan_filtering) - return 0; + return -EOPNOTSUPP; - if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering)) + /* We are called from dsa_slave_switchdev_blocking_event(), + * which is not under rcu_read_lock(), unlike + * dsa_slave_switchdev_event(). + */ + rcu_read_lock(); + apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack); + rcu_read_unlock(); + if (!apply) return -EINVAL; if (dsa_port_is_vlan_filtering(dp) == vlan_filtering) return 0; - err = ds->ops->port_vlan_filtering(ds, dp->index, - vlan_filtering); + err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering, + extack); if (err) return err; - if (ds->vlan_filtering_is_global) + if (ds->vlan_filtering_is_global) { + struct dsa_port *other_dp; + ds->vlan_filtering = vlan_filtering; - else + + dsa_switch_for_each_user_port(other_dp, ds) { + struct net_device *slave = other_dp->slave; + + /* We might be called in the unbind path, so not + * all slave devices might still be registered. + */ + if (!slave) + continue; + + err = dsa_slave_manage_vlan_filtering(slave, + vlan_filtering); + if (err) + goto restore; + } + } else { dp->vlan_filtering = vlan_filtering; + + err = dsa_slave_manage_vlan_filtering(dp->slave, + vlan_filtering); + if (err) + goto restore; + } + return 0; + +restore: + ds->ops->port_vlan_filtering(ds, dp->index, old_vlan_filtering, NULL); + + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = old_vlan_filtering; + else + dp->vlan_filtering = old_vlan_filtering; + + return err; } -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans) +/* This enforces legacy behavior for switch drivers which assume they can't + * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0 + */ +bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) +{ + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct dsa_switch *ds = dp->ds; + + if (!br) + return false; + + return !ds->configure_vlan_while_not_filtering && !br_vlan_enabled(br); +} + +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) { unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - struct dsa_notifier_ageing_time_info info = { - .ageing_time = ageing_time, - .trans = trans, - }; + struct dsa_notifier_ageing_time_info info; + int err; + + info.ageing_time = ageing_time; - if (switchdev_trans_ph_prepare(trans)) - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + if (err) + return err; dp->ageing_time = ageing_time; - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + return 0; +} + +int dsa_port_mst_enable(struct dsa_port *dp, bool on, + struct netlink_ext_ack *extack) +{ + if (on && !dsa_port_supports_mst(dp)) { + NL_SET_ERR_MSG_MOD(extack, "Hardware does not support MST"); + return -EINVAL; + } + + return 0; } -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - if (!ds->ops->port_egress_floods || - (flags & ~(BR_FLOOD | BR_MCAST_FLOOD))) + if (!ds->ops->port_pre_bridge_flags) return -EINVAL; - return 0; + return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_bridge_flags(struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - int port = dp->index; - int err = 0; + int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; + if (!ds->ops->port_bridge_flags) + return -EOPNOTSUPP; - if (ds->ops->port_egress_floods) - err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD, - flags & BR_MCAST_FLOOD); + err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + if (err) + return err; - return err; + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; + + if (learning == dp->learning) + return 0; + + if ((dp->learning && !learning) && + (dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING)) + dsa_port_fast_age(dp); + + dp->learning = learning; + } + + return 0; } -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans) +void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc) { struct dsa_switch *ds = dp->ds; - int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_egress_floods ? 0 : -EOPNOTSUPP; + if (ds->ops->port_set_host_flood) + ds->ops->port_set_host_flood(ds, dp->index, uc, mc); +} - return ds->ops->port_egress_floods(ds, port, true, mrouter); +int dsa_port_vlan_msti(struct dsa_port *dp, + const struct switchdev_vlan_msti *msti) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->vlan_msti_set) + return -EOPNOTSUPP; + + return ds->ops->vlan_msti_set(ds, *dp->bridge, msti); +} + +int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu) +{ + struct dsa_notifier_mtu_info info = { + .dp = dp, + .mtu = new_mtu, + }; + + return dsa_port_notify(dp, DSA_NOTIFIER_MTU, &info); } int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + /* Refcounting takes bridge.num as a key, and should be global for all + * bridges in the absence of FDB isolation, and per bridge otherwise. + * Force the bridge.num to zero here in the absence of FDB isolation. + */ + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); } @@ -314,16 +982,157 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { struct dsa_notifier_fdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .addr = addr, .vid = vid, - + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } +static int dsa_port_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_notifier_fdb_info info = { + .dp = dp, + .addr = addr, + .vid = vid, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); +} + +int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, + }; + + return dsa_port_host_fdb_add(dp, addr, vid, db); +} + +int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct net_device *master = dsa_port_to_master(dp); + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; + int err; + + /* Avoid a call to __dev_set_promiscuity() on the master, which + * requires rtnl_lock(), since we can't guarantee that is held here, + * and we can't take it either. + */ + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(master, addr); + if (err) + return err; + } + + return dsa_port_host_fdb_add(dp, addr, vid, db); +} + +static int dsa_port_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_notifier_fdb_info info = { + .dp = dp, + .addr = addr, + .vid = vid, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); +} + +int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, + }; + + return dsa_port_host_fdb_del(dp, addr, vid, db); +} + +int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid) +{ + struct net_device *master = dsa_port_to_master(dp); + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; + int err; + + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(master, addr); + if (err) + return err; + } + + return dsa_port_host_fdb_del(dp, addr, vid, db); +} + +int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) +{ + struct dsa_notifier_lag_fdb_info info = { + .lag = dp->lag, + .addr = addr, + .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_ADD, &info); +} + +int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) +{ + struct dsa_notifier_lag_fdb_info info = { + .lag = dp->lag, + .addr = addr, + .vid = vid, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_DEL, &info); +} + int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) { struct dsa_switch *ds = dp->ds; @@ -336,16 +1145,20 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) } int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, - .trans = trans, + .dp = dp, .mdb = mdb, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); } @@ -353,23 +1166,116 @@ int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .mdb = mdb, + .db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }, }; + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); } +static int dsa_port_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct dsa_notifier_mdb_info info = { + .dp = dp, + .mdb = mdb, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info); +} + +int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, + }; + + return dsa_port_host_mdb_add(dp, mdb, db); +} + +int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct net_device *master = dsa_port_to_master(dp); + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; + int err; + + err = dev_mc_add(master, mdb->addr); + if (err) + return err; + + return dsa_port_host_mdb_add(dp, mdb, db); +} + +static int dsa_port_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct dsa_notifier_mdb_info info = { + .dp = dp, + .mdb = mdb, + .db = db, + }; + + if (!dp->ds->fdb_isolation) + info.db.bridge.num = 0; + + return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info); +} + +int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_db db = { + .type = DSA_DB_PORT, + .dp = dp, + }; + + return dsa_port_host_mdb_del(dp, mdb, db); +} + +int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) +{ + struct net_device *master = dsa_port_to_master(dp); + struct dsa_db db = { + .type = DSA_DB_BRIDGE, + .bridge = *dp->bridge, + }; + int err; + + err = dev_mc_del(master, mdb->addr); + if (err) + return err; + + return dsa_port_host_mdb_del(dp, mdb, db); +} + int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + struct netlink_ext_ack *extack) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, - .trans = trans, + .dp = dp, .vlan = vlan, + .extack = extack, }; return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); @@ -379,46 +1285,233 @@ int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { struct dsa_notifier_vlan_info info = { - .sw_index = dp->ds->index, - .port = dp->index, + .dp = dp, .vlan = vlan, }; return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } -int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) +int dsa_port_host_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { - struct switchdev_obj_port_vlan vlan = { - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .flags = flags, - .vid_begin = vid, - .vid_end = vid, + struct net_device *master = dsa_port_to_master(dp); + struct dsa_notifier_vlan_info info = { + .dp = dp, + .vlan = vlan, + .extack = extack, }; - struct switchdev_trans trans; int err; - trans.ph_prepare = true; - err = dsa_port_vlan_add(dp, &vlan, &trans); - if (err) + err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_ADD, &info); + if (err && err != -EOPNOTSUPP) return err; - trans.ph_prepare = false; - return dsa_port_vlan_add(dp, &vlan, &trans); + vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid); + + return err; } -EXPORT_SYMBOL(dsa_port_vid_add); -int dsa_port_vid_del(struct dsa_port *dp, u16 vid) +int dsa_port_host_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan) { - struct switchdev_obj_port_vlan vlan = { - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid_begin = vid, - .vid_end = vid, + struct net_device *master = dsa_port_to_master(dp); + struct dsa_notifier_vlan_info info = { + .dp = dp, + .vlan = vlan, }; + int err; + + err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_DEL, &info); + if (err && err != -EOPNOTSUPP) + return err; + + vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); + + return err; +} + +int dsa_port_mrp_add(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_mrp_add) + return -EOPNOTSUPP; + + return ds->ops->port_mrp_add(ds, dp->index, mrp); +} + +int dsa_port_mrp_del(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_mrp_del) + return -EOPNOTSUPP; + + return ds->ops->port_mrp_del(ds, dp->index, mrp); +} + +int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_mrp_add_ring_role) + return -EOPNOTSUPP; - return dsa_port_vlan_del(dp, &vlan); + return ds->ops->port_mrp_add_ring_role(ds, dp->index, mrp); +} + +int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_mrp_del_ring_role) + return -EOPNOTSUPP; + + return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); +} + +static int dsa_port_assign_master(struct dsa_port *dp, + struct net_device *master, + struct netlink_ext_ack *extack, + bool fail_on_err) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index, err; + + err = ds->ops->port_change_master(ds, port, master, extack); + if (err && !fail_on_err) + dev_err(ds->dev, "port %d failed to assign master %s: %pe\n", + port, master->name, ERR_PTR(err)); + + if (err && fail_on_err) + return err; + + dp->cpu_dp = master->dsa_ptr; + dp->cpu_port_in_lag = netif_is_lag_master(master); + + return 0; +} + +/* Change the dp->cpu_dp affinity for a user port. Note that both cross-chip + * notifiers and drivers have implicit assumptions about user-to-CPU-port + * mappings, so we unfortunately cannot delay the deletion of the objects + * (switchdev, standalone addresses, standalone VLANs) on the old CPU port + * until the new CPU port has been set up. So we need to completely tear down + * the old CPU port before changing it, and restore it on errors during the + * bringup of the new one. + */ +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp); + struct net_device *old_master = dsa_port_to_master(dp); + struct net_device *dev = dp->slave; + struct dsa_switch *ds = dp->ds; + bool vlan_filtering; + int err, tmp; + + /* Bridges may hold host FDB, MDB and VLAN objects. These need to be + * migrated, so dynamically unoffload and later reoffload the bridge + * port. + */ + if (bridge_dev) { + dsa_port_pre_bridge_leave(dp, bridge_dev); + dsa_port_bridge_leave(dp, bridge_dev); + } + + /* The port might still be VLAN filtering even if it's no longer + * under a bridge, either due to ds->vlan_filtering_is_global or + * ds->needs_standalone_vlan_filtering. In turn this means VLANs + * on the CPU port. + */ + vlan_filtering = dsa_port_is_vlan_filtering(dp); + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, false); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to remove standalone VLANs"); + goto rewind_old_bridge; + } + } + + /* Standalone addresses, and addresses of upper interfaces like + * VLAN, LAG, HSR need to be migrated. + */ + dsa_slave_unsync_ha(dev); + + err = dsa_port_assign_master(dp, master, extack, true); + if (err) + goto rewind_old_addrs; + + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to restore standalone VLANs"); + goto rewind_new_addrs; + } + } + + if (bridge_dev) { + err = dsa_port_bridge_join(dp, bridge_dev, extack); + if (err && err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to reoffload bridge"); + goto rewind_new_vlan; + } + } + + return 0; + +rewind_new_vlan: + if (vlan_filtering) + dsa_slave_manage_vlan_filtering(dev, false); + +rewind_new_addrs: + dsa_slave_unsync_ha(dev); + + dsa_port_assign_master(dp, old_master, NULL, false); + +/* Restore the objects on the old CPU port */ +rewind_old_addrs: + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + tmp = dsa_slave_manage_vlan_filtering(dev, true); + if (tmp) { + dev_err(ds->dev, + "port %d failed to restore standalone VLANs: %pe\n", + dp->index, ERR_PTR(tmp)); + } + } + +rewind_old_bridge: + if (bridge_dev) { + tmp = dsa_port_bridge_join(dp, bridge_dev, extack); + if (tmp) { + dev_err(ds->dev, + "port %d failed to rejoin bridge %s: %pe\n", + dp->index, bridge_dev->name, ERR_PTR(tmp)); + } + } + + return err; +} + +void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops) +{ + cpu_dp->rcv = tag_ops->rcv; + cpu_dp->tag_ops = tag_ops; } -EXPORT_SYMBOL(dsa_port_vid_del); static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { @@ -446,8 +1539,11 @@ static void dsa_port_phylink_validate(struct phylink_config *config, struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; - if (!ds->ops->phylink_validate) + if (!ds->ops->phylink_validate) { + if (config->mac_capabilities) + phylink_generic_validate(config, supported, state); return; + } ds->ops->phylink_validate(ds, dp->index, supported, state); } @@ -457,6 +1553,7 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; + int err; /* Only called for inband modes */ if (!ds->ops->phylink_mac_link_state) { @@ -464,8 +1561,26 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, return; } - if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0) + err = ds->ops->phylink_mac_link_state(ds, dp->index, state); + if (err < 0) { + dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n", + dp->index, err); state->link = 0; + } +} + +static struct phylink_pcs * +dsa_port_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->phylink_mac_select_pcs) + pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface); + + return pcs; } static void dsa_port_phylink_mac_config(struct phylink_config *config, @@ -500,7 +1615,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct phy_device *phydev = NULL; struct dsa_switch *ds = dp->ds; - if (dsa_is_user_port(ds, dp->index)) + if (dsa_port_is_user(dp)) phydev = dp->slave->phydev; if (!ds->ops->phylink_mac_link_down) { @@ -513,9 +1628,11 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, } static void dsa_port_phylink_mac_link_up(struct phylink_config *config, + struct phy_device *phydev, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -526,11 +1643,13 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, return; } - ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev); + ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev, + speed, duplex, tx_pause, rx_pause); } -const struct phylink_mac_ops dsa_port_phylink_mac_ops = { +static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .validate = dsa_port_phylink_validate, + .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state, .mac_config = dsa_port_phylink_mac_config, .mac_an_restart = dsa_port_phylink_mac_an_restart, @@ -538,7 +1657,46 @@ const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .mac_link_up = dsa_port_phylink_mac_link_up, }; -static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +int dsa_port_phylink_create(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + phy_interface_t mode; + struct phylink *pl; + int err; + + err = of_get_phy_mode(dp->dn, &mode); + if (err) + mode = PHY_INTERFACE_MODE_NA; + + /* Presence of phylink_mac_link_state or phylink_mac_an_restart is + * an indicator of a legacy phylink driver. + */ + if (ds->ops->phylink_mac_link_state || + ds->ops->phylink_mac_an_restart) + dp->pl_config.legacy_pre_march2020 = true; + + if (ds->ops->phylink_get_caps) + ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config); + + pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), + mode, &dsa_port_phylink_mac_ops); + if (IS_ERR(pl)) { + pr_err("error creating PHYLINK: %ld\n", PTR_ERR(pl)); + return PTR_ERR(pl); + } + + dp->pl = pl; + + return 0; +} + +void dsa_port_phylink_destroy(struct dsa_port *dp) +{ + phylink_destroy(dp->pl); + dp->pl = NULL; +} + +static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable) { struct dsa_switch *ds = dp->ds; struct phy_device *phydev; @@ -576,7 +1734,7 @@ err_put_dev: return err; } -static int dsa_port_fixed_link_register_of(struct dsa_port *dp) +static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp) { struct device_node *dn = dp->dn; struct dsa_switch *ds = dp->ds; @@ -610,27 +1768,18 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp) return 0; } -static int dsa_port_phylink_register(struct dsa_port *dp) +static int dsa_shared_port_phylink_register(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; struct device_node *port_dn = dp->dn; - phy_interface_t mode; int err; - err = of_get_phy_mode(port_dn, &mode); - if (err) - mode = PHY_INTERFACE_MODE_NA; - dp->pl_config.dev = ds->dev; dp->pl_config.type = PHYLINK_DEV; - dp->pl_config.pcs_poll = ds->pcs_poll; - dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), - mode, &dsa_port_phylink_mac_ops); - if (IS_ERR(dp->pl)) { - pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); - return PTR_ERR(dp->pl); - } + err = dsa_port_phylink_create(dp); + if (err) + return err; err = phylink_of_phy_connect(dp->pl, port_dn, 0); if (err && err != -ENODEV) { @@ -641,19 +1790,188 @@ static int dsa_port_phylink_register(struct dsa_port *dp) return 0; err_phy_connect: - phylink_destroy(dp->pl); + dsa_port_phylink_destroy(dp); return err; } -int dsa_port_link_register_of(struct dsa_port *dp) +/* During the initial DSA driver migration to OF, port nodes were sometimes + * added to device trees with no indication of how they should operate from a + * link management perspective (phy-handle, fixed-link, etc). Additionally, the + * phy-mode may be absent. The interpretation of these port OF nodes depends on + * their type. + * + * User ports with no phy-handle or fixed-link are expected to connect to an + * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to + * the port number. This description is still actively supported. + * + * Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to + * operate at the maximum speed that their phy-mode is capable of. If the + * phy-mode is absent, they are expected to operate using the phy-mode + * supported by the port that gives the highest link speed. It is unspecified + * if the port should use flow control or not, half duplex or full duplex, or + * if the phy-mode is a SERDES link, whether in-band autoneg is expected to be + * enabled or not. + * + * In the latter case of shared ports, omitting the link management description + * from the firmware node is deprecated and strongly discouraged. DSA uses + * phylink, which rejects the firmware nodes of these ports for lacking + * required properties. + * + * For switches in this table, DSA will skip enforcing validation and will + * later omit registering a phylink instance for the shared ports, if they lack + * a fixed-link, a phy-handle, or a managed = "in-band-status" property. + * It becomes the responsibility of the driver to ensure that these ports + * operate at the maximum speed (whatever this means) and will interoperate + * with the DSA master or other cascade port, since phylink methods will not be + * invoked for them. + * + * If you are considering expanding this table for newly introduced switches, + * think again. It is OK to remove switches from this table if there aren't DT + * blobs in circulation which rely on defaulting the shared ports. + */ +static const char * const dsa_switches_apply_workarounds[] = { +#if IS_ENABLED(CONFIG_NET_DSA_XRS700X) + "arrow,xrs7003e", + "arrow,xrs7003f", + "arrow,xrs7004e", + "arrow,xrs7004f", +#endif +#if IS_ENABLED(CONFIG_B53) + "brcm,bcm5325", + "brcm,bcm53115", + "brcm,bcm53125", + "brcm,bcm53128", + "brcm,bcm5365", + "brcm,bcm5389", + "brcm,bcm5395", + "brcm,bcm5397", + "brcm,bcm5398", + "brcm,bcm53010-srab", + "brcm,bcm53011-srab", + "brcm,bcm53012-srab", + "brcm,bcm53018-srab", + "brcm,bcm53019-srab", + "brcm,bcm5301x-srab", + "brcm,bcm11360-srab", + "brcm,bcm58522-srab", + "brcm,bcm58525-srab", + "brcm,bcm58535-srab", + "brcm,bcm58622-srab", + "brcm,bcm58623-srab", + "brcm,bcm58625-srab", + "brcm,bcm88312-srab", + "brcm,cygnus-srab", + "brcm,nsp-srab", + "brcm,omega-srab", + "brcm,bcm3384-switch", + "brcm,bcm6328-switch", + "brcm,bcm6368-switch", + "brcm,bcm63xx-switch", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_BCM_SF2) + "brcm,bcm7445-switch-v4.0", + "brcm,bcm7278-switch-v4.0", + "brcm,bcm7278-switch-v4.8", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_LANTIQ_GSWIP) + "lantiq,xrx200-gswip", + "lantiq,xrx300-gswip", + "lantiq,xrx330-gswip", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_MV88E6060) + "marvell,mv88e6060", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_MV88E6XXX) + "marvell,mv88e6085", + "marvell,mv88e6190", + "marvell,mv88e6250", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) + "microchip,ksz8765", + "microchip,ksz8794", + "microchip,ksz8795", + "microchip,ksz8863", + "microchip,ksz8873", + "microchip,ksz9477", + "microchip,ksz9897", + "microchip,ksz9893", + "microchip,ksz9563", + "microchip,ksz8563", + "microchip,ksz9567", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) + "smsc,lan9303-mdio", +#endif +#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_I2C) + "smsc,lan9303-i2c", +#endif + NULL, +}; + +static void dsa_shared_port_validate_of(struct dsa_port *dp, + bool *missing_phy_mode, + bool *missing_link_description) +{ + struct device_node *dn = dp->dn, *phy_np; + struct dsa_switch *ds = dp->ds; + phy_interface_t mode; + + *missing_phy_mode = false; + *missing_link_description = false; + + if (of_get_phy_mode(dn, &mode)) { + *missing_phy_mode = true; + dev_err(ds->dev, + "OF node %pOF of %s port %d lacks the required \"phy-mode\" property\n", + dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index); + } + + /* Note: of_phy_is_fixed_link() also returns true for + * managed = "in-band-status" + */ + if (of_phy_is_fixed_link(dn)) + return; + + phy_np = of_parse_phandle(dn, "phy-handle", 0); + if (phy_np) { + of_node_put(phy_np); + return; + } + + *missing_link_description = true; + + dev_err(ds->dev, + "OF node %pOF of %s port %d lacks the required \"phy-handle\", \"fixed-link\" or \"managed\" properties\n", + dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index); +} + +int dsa_shared_port_link_register_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; - struct device_node *phy_np; + bool missing_link_description; + bool missing_phy_mode; + int port = dp->index; + + dsa_shared_port_validate_of(dp, &missing_phy_mode, + &missing_link_description); + + if ((missing_phy_mode || missing_link_description) && + !of_device_compatible_match(ds->dev->of_node, + dsa_switches_apply_workarounds)) + return -EINVAL; if (!ds->ops->adjust_link) { - phy_np = of_parse_phandle(dp->dn, "phy-handle", 0); - if (of_phy_is_fixed_link(dp->dn) || phy_np) - return dsa_port_phylink_register(dp); + if (missing_link_description) { + dev_warn(ds->dev, + "Skipping phylink registration for %s port %d\n", + dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index); + } else { + if (ds->ops->phylink_mac_link_down) + ds->ops->phylink_mac_link_down(ds, port, + MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); + + return dsa_shared_port_phylink_register(dp); + } return 0; } @@ -661,12 +1979,12 @@ int dsa_port_link_register_of(struct dsa_port *dp) "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n"); if (of_phy_is_fixed_link(dp->dn)) - return dsa_port_fixed_link_register_of(dp); + return dsa_shared_port_fixed_link_register_of(dp); else - return dsa_port_setup_phy_of(dp, true); + return dsa_shared_port_setup_phy_of(dp, true); } -void dsa_port_link_unregister_of(struct dsa_port *dp) +void dsa_shared_port_link_unregister_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; @@ -674,70 +1992,76 @@ void dsa_port_link_unregister_of(struct dsa_port *dp) rtnl_lock(); phylink_disconnect_phy(dp->pl); rtnl_unlock(); - phylink_destroy(dp->pl); - dp->pl = NULL; + dsa_port_phylink_destroy(dp); return; } if (of_phy_is_fixed_link(dp->dn)) of_phy_deregister_fixed_link(dp->dn); else - dsa_port_setup_phy_of(dp, false); + dsa_shared_port_setup_phy_of(dp, false); } -int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data) +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) { - struct phy_device *phydev; - int ret = -EOPNOTSUPP; + struct dsa_switch *ds = dp->ds; + int err; - if (of_phy_is_fixed_link(dp->dn)) - return ret; + if (!ds->ops->port_hsr_join) + return -EOPNOTSUPP; - phydev = dsa_port_get_phy_device(dp); - if (IS_ERR_OR_NULL(phydev)) - return ret; + dp->hsr_dev = hsr; - ret = phy_ethtool_get_strings(phydev, data); - put_device(&phydev->mdio.dev); + err = ds->ops->port_hsr_join(ds, dp->index, hsr); + if (err) + dp->hsr_dev = NULL; - return ret; + return err; } -EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings); -int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data) +void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) { - struct phy_device *phydev; - int ret = -EOPNOTSUPP; - - if (of_phy_is_fixed_link(dp->dn)) - return ret; - - phydev = dsa_port_get_phy_device(dp); - if (IS_ERR_OR_NULL(phydev)) - return ret; + struct dsa_switch *ds = dp->ds; + int err; - ret = phy_ethtool_get_stats(phydev, NULL, data); - put_device(&phydev->mdio.dev); + dp->hsr_dev = NULL; - return ret; + if (ds->ops->port_hsr_leave) { + err = ds->ops->port_hsr_leave(ds, dp->index, hsr); + if (err) + dev_err(dp->ds->dev, + "port %d failed to leave HSR %s: %pe\n", + dp->index, hsr->name, ERR_PTR(err)); + } } -EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats); -int dsa_port_get_phy_sset_count(struct dsa_port *dp) +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast) { - struct phy_device *phydev; - int ret = -EOPNOTSUPP; + struct dsa_notifier_tag_8021q_vlan_info info = { + .dp = dp, + .vid = vid, + }; - if (of_phy_is_fixed_link(dp->dn)) - return ret; + if (broadcast) + return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); - phydev = dsa_port_get_phy_device(dp); - if (IS_ERR_OR_NULL(phydev)) - return ret; + return dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); +} - ret = phy_ethtool_get_sset_count(phydev); - put_device(&phydev->mdio.dev); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast) +{ + struct dsa_notifier_tag_8021q_vlan_info info = { + .dp = dp, + .vid = vid, + }; + int err; - return ret; + if (broadcast) + err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + else + err = dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + if (err) + dev_err(dp->ds->dev, + "port %d failed to notify tag_8021q VLAN %d deletion: %pe\n", + dp->index, vid, ERR_PTR(err)); } -EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ddc0f9236928..a9fde48cffd4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -15,13 +15,197 @@ #include <linux/mdio.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> +#include <net/selftests.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> +#include <linux/if_hsr.h> +#include <net/dcbnl.h> #include <linux/netpoll.h> -#include <linux/ptp_classify.h> #include "dsa_priv.h" +static void dsa_slave_standalone_event_work(struct work_struct *work) +{ + struct dsa_standalone_event_work *standalone_work = + container_of(work, struct dsa_standalone_event_work, work); + const unsigned char *addr = standalone_work->addr; + struct net_device *dev = standalone_work->dev; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_mdb mdb; + struct dsa_switch *ds = dp->ds; + u16 vid = standalone_work->vid; + int err; + + switch (standalone_work->event) { + case DSA_UC_ADD: + err = dsa_port_standalone_host_fdb_add(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + + case DSA_UC_DEL: + err = dsa_port_standalone_host_fdb_del(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, addr, vid, err); + } + + break; + case DSA_MC_ADD: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_add(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to mdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + case DSA_MC_DEL: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_del(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from mdb: %d\n", + dp->index, addr, vid, err); + } + + break; + } + + kfree(standalone_work); +} + +static int dsa_slave_schedule_standalone_work(struct net_device *dev, + enum dsa_standalone_event event, + const unsigned char *addr, + u16 vid) +{ + struct dsa_standalone_event_work *standalone_work; + + standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC); + if (!standalone_work) + return -ENOMEM; + + INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work); + standalone_work->event = event; + standalone_work->dev = dev; + + ether_addr_copy(standalone_work->addr, addr); + standalone_work->vid = vid; + + dsa_schedule_work(&standalone_work->work); + + return 0; +} + +static int dsa_slave_sync_uc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + + dev_uc_add(master, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + + return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); +} + +static int dsa_slave_unsync_uc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + + dev_uc_del(master, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + + return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); +} + +static int dsa_slave_sync_mc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + + dev_mc_add(master, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + + return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); +} + +static int dsa_slave_unsync_mc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + + dev_mc_del(master, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + + return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); +} + +void dsa_slave_sync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_sync_mc(dev, ha->addr); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_sync_uc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + +void dsa_slave_unsync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_unsync_uc(dev, ha->addr); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_unsync_mc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -66,43 +250,39 @@ static int dsa_slave_open(struct net_device *dev) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int err; - if (!(master->flags & IFF_UP)) - return -ENETDOWN; + err = dev_open(master, NULL); + if (err < 0) { + netdev_err(dev, "failed to open master %s\n", master->name); + goto out; + } - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { - err = dev_uc_add(master, dev->dev_addr); - if (err < 0) + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0); + if (err) goto out; } - if (dev->flags & IFF_ALLMULTI) { - err = dev_set_allmulti(master, 1); - if (err < 0) - goto del_unicast; - } - if (dev->flags & IFF_PROMISC) { - err = dev_set_promiscuity(master, 1); + if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { + err = dev_uc_add(master, dev->dev_addr); if (err < 0) - goto clear_allmulti; + goto del_host_addr; } err = dsa_port_enable_rt(dp, dev->phydev); if (err) - goto clear_promisc; + goto del_unicast; return 0; -clear_promisc: - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(master, -1); -clear_allmulti: - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(master, -1); del_unicast: if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); +del_host_addr: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); out: return err; } @@ -111,68 +291,97 @@ static int dsa_slave_close(struct net_device *dev) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; dsa_port_disable_rt(dp); - dev_mc_unsync(master, dev); - dev_uc_unsync(master, dev); - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(master, -1); - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(master, -1); - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + return 0; } +static void dsa_slave_manage_host_flood(struct net_device *dev) +{ + bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); + struct dsa_port *dp = dsa_slave_to_port(dev); + bool uc = dev->flags & IFF_PROMISC; + + dsa_port_set_host_flood(dp, uc, mc); +} + static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - if (dev->flags & IFF_UP) { - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, - dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, - dev->flags & IFF_PROMISC ? 1 : -1); - } + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + + if (dsa_switch_supports_uc_filtering(ds) && + dsa_switch_supports_mc_filtering(ds)) + dsa_slave_manage_host_flood(dev); } static void dsa_slave_set_rx_mode(struct net_device *dev) { - struct net_device *master = dsa_slave_to_master(dev); - - dev_mc_sync(master, dev); - dev_uc_sync(master, dev); + __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc); + __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc); } static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; struct sockaddr *addr = a; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + /* If the port is down, the address isn't synced yet to hardware or + * to the DSA master, so there is nothing to change. + */ if (!(dev->flags & IFF_UP)) - goto out; + goto out_change_dev_addr; + + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0); + if (err) + return err; + } if (!ether_addr_equal(addr->sa_data, master->dev_addr)) { err = dev_uc_add(master, addr->sa_data); if (err < 0) - return err; + goto del_unicast; } if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); -out: - ether_addr_copy(dev->dev_addr, addr->sa_data); + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + +out_change_dev_addr: + eth_hw_addr_set(dev, addr->sa_data); return 0; + +del_unicast: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0); + + return err; } struct dsa_slave_dump_ctx { @@ -267,33 +476,66 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return phylink_mii_ioctl(p->dp->pl, ifr, cmd); } -static int dsa_slave_port_attr_set(struct net_device *dev, +static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, - struct switchdev_trans *trans) + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); int ret; + if (ctx && ctx != dp) + return 0; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = dsa_port_set_state(dp, attr->u.stp_state, trans); + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_set_state(dp, attr->u.stp_state, true); + break; + case SWITCHDEV_ATTR_ID_PORT_MST_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, - trans); + extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MST: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_mst_enable(dp, attr->u.mst, extack); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, - trans); + extack); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans); + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; - case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, trans); + case SWITCHDEV_ATTR_ID_VLAN_MSTI: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti); break; default: ret = -EOPNOTSUPP; @@ -303,67 +545,130 @@ static int dsa_slave_port_attr_set(struct net_device *dev, return ret; } +/* Must be called under rcu_read_lock() */ +static int +dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, + const struct switchdev_obj_port_vlan *vlan) +{ + struct net_device *upper_dev; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { + u16 vid; + + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + if (vid == vlan->vid) + return -EBUSY; + } + + return 0; +} + static int dsa_slave_vlan_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans) + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan; + struct switchdev_obj_port_vlan *vlan; int err; - if (obj->orig_dev != dev) + if (dsa_port_skip_vlan_configuration(dp)) { + NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); + return 0; + } + + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + /* Deny adding a bridge VLAN when there is already an 802.1Q upper with + * the same VID. + */ + if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { + rcu_read_lock(); + err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan); + rcu_read_unlock(); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Port already has a VLAN upper with this VID"); + return err; + } + } + + return dsa_port_vlan_add(dp, vlan, extack); +} + +/* Offload a VLAN installed on the bridge or on a foreign interface by + * installing it as a VLAN towards the CPU port. + */ +static int dsa_slave_host_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_vlan vlan; + + /* Do nothing if this is a software bridge */ + if (!dp->bridge) return -EOPNOTSUPP; - if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) { + NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; + } vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); - err = dsa_port_vlan_add(dp, &vlan, trans); - if (err) - return err; - - /* We need the dedicated CPU port to be a member of the VLAN as well. - * Even though drivers often handle CPU membership in special ways, + /* Even though drivers often handle CPU membership in special ways, * it doesn't make sense to program a PVID, so clear this flag. */ vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; - err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans); - if (err) - return err; - - return 0; + return dsa_port_host_vlan_add(dp, &vlan, extack); } -static int dsa_slave_port_obj_add(struct net_device *dev, +static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); int err; - /* For the prepare phase, ensure the full set of changes is feasable in - * one go in order to signal a failure properly. If an operation is not - * supported, return -EOPNOTSUPP. - */ + if (ctx && ctx != dp) + return 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; - err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); + + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: - /* DSA can directly translate this to a normal MDB add, - * but on the CPU port. - */ - err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_vlan_add(dev, obj, trans); + if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + err = dsa_slave_vlan_add(dev, obj, extack); + else + err = dsa_slave_host_vlan_add(dev, obj, extack); + break; + case SWITCHDEV_OBJ_ID_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); + break; + case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_add_ring_role(dp, + SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; default: err = -EOPNOTSUPP; @@ -377,39 +682,74 @@ static int dsa_slave_vlan_del(struct net_device *dev, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_vlan *vlan; + + if (dsa_port_skip_vlan_configuration(dp)) + return 0; + + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + return dsa_port_vlan_del(dp, vlan); +} + +static int dsa_slave_host_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_vlan *vlan; - if (obj->orig_dev != dev) + /* Do nothing if this is a software bridge */ + if (!dp->bridge) return -EOPNOTSUPP; - if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + if (dsa_port_skip_vlan_configuration(dp)) return 0; - /* Do not deprogram the CPU port as it may be shared with other user - * ports which can be members of this VLAN as well. - */ - return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + return dsa_port_host_vlan_del(dp, vlan); } -static int dsa_slave_port_obj_del(struct net_device *dev, +static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_slave_to_port(dev); int err; + if (ctx && ctx != dp) + return 0; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: - /* DSA can directly translate this to a normal MDB add, - * but on the CPU port. - */ - err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_vlan_del(dev, obj); + if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + err = dsa_slave_vlan_del(dev, obj); + else + err = dsa_slave_host_vlan_del(dev, obj); + break; + case SWITCHDEV_OBJ_ID_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); + break; + case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_del_ring_role(dp, + SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; default: err = -EOPNOTSUPP; @@ -419,64 +759,31 @@ static int dsa_slave_port_obj_del(struct net_device *dev, return err; } -static int dsa_slave_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct dsa_switch_tree *dst = ds->dst; - - /* For non-legacy ports, devlink is used and it takes - * care of the name generation. This ndo implementation - * should be removed with legacy support. - */ - if (dp->ds->devlink) - return -EOPNOTSUPP; - - ppid->id_len = sizeof(dst->index); - memcpy(&ppid->id, &dst->index, ppid->id_len); - - return 0; -} - static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER struct dsa_slave_priv *p = netdev_priv(dev); - if (p->netpoll) - netpoll_send_skb(p->netpoll, skb); + return netpoll_send_skb(p->netpoll, skb); #else BUG(); -#endif return NETDEV_TX_OK; +#endif } static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; - struct sk_buff *clone; - unsigned int type; - type = ptp_classify_raw(skb); - if (type == PTP_CLASS_NONE) + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) return; if (!ds->ops->port_txtstamp) return; - clone = skb_clone_sk(skb); - if (!clone) - return; - - DSA_SKB_CB(skb)->clone = clone; - - if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) - return; - - kfree_skb(clone); + ds->ops->port_txtstamp(ds, p->dp->index, skb); } netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) @@ -497,25 +804,53 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL_GPL(dsa_enqueue_skb); +static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) +{ + int needed_headroom = dev->needed_headroom; + int needed_tailroom = dev->needed_tailroom; + + /* For tail taggers, we need to pad short frames ourselves, to ensure + * that the tail tag does not fail at its role of being at the end of + * the packet, once the master interface pads the frame. Account for + * that pad length here, and pad later. + */ + if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) + needed_tailroom += ETH_ZLEN - skb->len; + /* skb_headroom() returns unsigned int... */ + needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); + needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); + + if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) + /* No reallocation needed, yay! */ + return 0; + + return pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct pcpu_sw_netstats *s; struct sk_buff *nskb; - s = this_cpu_ptr(p->stats64); - u64_stats_update_begin(&s->syncp); - s->tx_packets++; - s->tx_bytes += skb->len; - u64_stats_update_end(&s->syncp); + dev_sw_netstats_tx_add(dev, 1, skb->len); - DSA_SKB_CB(skb)->clone = NULL; + memset(skb->cb, 0, sizeof(skb->cb)); - /* Identify PTP protocol packets, clone them, and pass them to the - * switch driver - */ + /* Handle tx timestamp if any */ dsa_skb_tx_timestamp(p, skb); + if (dsa_realloc_skb(skb, dev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* needed_tailroom should still be 'warm' in the cache line from + * dsa_realloc_skb(), which has also ensured that padding is safe. + */ + if (dev->needed_tailroom) + eth_skb_pad(skb); + /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. */ @@ -533,9 +868,9 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); - strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); - strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); + strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); + strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } static int dsa_slave_get_regs_len(struct net_device *dev) @@ -620,7 +955,10 @@ static void dsa_slave_get_strings(struct net_device *dev, if (ds->ops->get_strings) ds->ops->get_strings(ds, dp->index, stringset, data + 4 * len); + } else if (stringset == ETH_SS_TEST) { + net_selftest_get_strings(data); } + } static void dsa_slave_get_ethtool_stats(struct net_device *dev, @@ -628,7 +966,6 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = dp->ds; struct pcpu_sw_netstats *s; unsigned int start; @@ -637,13 +974,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, for_each_possible_cpu(i) { u64 tx_packets, tx_bytes, rx_packets, rx_bytes; - s = per_cpu_ptr(p->stats64, i); + s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_irq(&s->syncp); - tx_packets = s->tx_packets; - tx_bytes = s->tx_bytes; - rx_packets = s->rx_packets; - rx_bytes = s->rx_bytes; + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); } while (u64_stats_fetch_retry_irq(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; @@ -660,18 +997,79 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { - int count; + int count = 0; - count = 4; - if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, dp->index, sset); + if (ds->ops->get_sset_count) { + count = ds->ops->get_sset_count(ds, dp->index, sset); + if (count < 0) + return count; + } - return count; + return count + 4; + } else if (sset == ETH_SS_TEST) { + return net_selftest_get_count(); } return -EOPNOTSUPP; } +static void dsa_slave_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_phy_stats) + ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); +} + +static void dsa_slave_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_mac_stats) + ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); +} + +static void +dsa_slave_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_ctrl_stats) + ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); +} + +static void +dsa_slave_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_rmon_stats) + ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); +} + +static void dsa_slave_net_selftest(struct net_device *ndev, + struct ethtool_test *etest, u64 *buf) +{ + struct dsa_port *dp = dsa_slave_to_port(ndev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->self_test) { + ds->ops->self_test(ds, dp->index, etest, buf); + return; + } + + net_selftest(ndev, etest, buf); +} + static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -753,6 +1151,16 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev, return phylink_ethtool_ksettings_set(dp->pl, cmd); } +static void dsa_slave_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_pause_stats) + ds->ops->get_pause_stats(ds, dp->index, pause_stats); +} + static void dsa_slave_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { @@ -811,24 +1219,6 @@ static void dsa_slave_poll_controller(struct net_device *dev) } #endif -static int dsa_slave_get_phys_port_name(struct net_device *dev, - char *name, size_t len) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - /* For non-legacy ports, devlink is used and it takes - * care of the name generation. This ndo implementation - * should be removed with legacy support. - */ - if (dp->ds->devlink) - return -EOPNOTSUPP; - - if (snprintf(name, len, "p%d", dp->index) >= len) - return -EINVAL; - - return 0; -} - static struct dsa_mall_tc_entry * dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { @@ -842,59 +1232,136 @@ dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) return NULL; } -static int dsa_slave_add_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) +static int +dsa_slave_add_cls_matchall_mirred(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) { + struct netlink_ext_ack *extack = cls->common.extack; struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_mall_mirror_tc_entry *mirror; struct dsa_mall_tc_entry *mall_tc_entry; - __be16 protocol = cls->common.protocol; struct dsa_switch *ds = dp->ds; struct flow_action_entry *act; struct dsa_port *to_dp; - int err = -EOPNOTSUPP; + int err; if (!ds->ops->port_mirror_add) - return err; + return -EOPNOTSUPP; - if (!flow_offload_has_one_action(&cls->rule->action)) - return err; + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) + return -EOPNOTSUPP; act = &cls->rule->action.entries[0]; - if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { - struct dsa_mall_mirror_tc_entry *mirror; + if (!act->dev) + return -EINVAL; - if (!act->dev) - return -EINVAL; + if (!dsa_slave_dev_check(act->dev)) + return -EOPNOTSUPP; - if (!dsa_slave_dev_check(act->dev)) - return -EOPNOTSUPP; + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; - mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); - if (!mall_tc_entry) - return -ENOMEM; + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = DSA_PORT_MALL_MIRROR; + mirror = &mall_tc_entry->mirror; - mall_tc_entry->cookie = cls->cookie; - mall_tc_entry->type = DSA_PORT_MALL_MIRROR; - mirror = &mall_tc_entry->mirror; + to_dp = dsa_slave_to_port(act->dev); - to_dp = dsa_slave_to_port(act->dev); + mirror->to_local_port = to_dp->index; + mirror->ingress = ingress; - mirror->to_local_port = to_dp->index; - mirror->ingress = ingress; + err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack); + if (err) { + kfree(mall_tc_entry); + return err; + } - err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress); - if (err) { - kfree(mall_tc_entry); - return err; + list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + + return err; +} + +static int +dsa_slave_add_cls_matchall_police(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_mall_policer_tc_entry *policer; + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = dp->ds; + struct flow_action_entry *act; + int err; + + if (!ds->ops->port_policer_add) { + NL_SET_ERR_MSG_MOD(extack, + "Policing offload not implemented"); + return -EOPNOTSUPP; + } + + if (!ingress) { + NL_SET_ERR_MSG_MOD(extack, + "Only supported on ingress qdisc"); + return -EOPNOTSUPP; + } + + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) + return -EOPNOTSUPP; + + list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { + if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { + NL_SET_ERR_MSG_MOD(extack, + "Only one port policer allowed"); + return -EEXIST; } + } + + act = &cls->rule->action.entries[0]; + + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = DSA_PORT_MALL_POLICER; + policer = &mall_tc_entry->policer; + policer->rate_bytes_per_sec = act->police.rate_bytes_ps; + policer->burst = act->police.burst; - list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + err = ds->ops->port_policer_add(ds, dp->index, policer); + if (err) { + kfree(mall_tc_entry); + return err; } - return 0; + list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + + return err; +} + +static int dsa_slave_add_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + int err = -EOPNOTSUPP; + + if (cls->common.protocol == htons(ETH_P_ALL) && + flow_offload_has_one_action(&cls->rule->action) && + cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED) + err = dsa_slave_add_cls_matchall_mirred(dev, cls, ingress); + else if (flow_offload_has_one_action(&cls->rule->action) && + cls->rule->action.entries[0].id == FLOW_ACTION_POLICE) + err = dsa_slave_add_cls_matchall_police(dev, cls, ingress); + + return err; } static void dsa_slave_del_cls_matchall(struct net_device *dev, @@ -904,9 +1371,6 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; - if (!ds->ops->port_mirror_del) - return; - mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; @@ -915,7 +1379,13 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, switch (mall_tc_entry->type) { case DSA_PORT_MALL_MIRROR: - ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror); + if (ds->ops->port_mirror_del) + ds->ops->port_mirror_del(ds, dp->index, + &mall_tc_entry->mirror); + break; + case DSA_PORT_MALL_POLICER: + if (ds->ops->port_policer_del) + ds->ops->port_policer_del(ds, dp->index); break; default: WARN_ON(1); @@ -942,6 +1412,64 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, } } +static int dsa_slave_add_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_add) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_add(ds, port, cls, ingress); +} + +static int dsa_slave_del_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_del) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_del(ds, port, cls, ingress); +} + +static int dsa_slave_stats_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_stats) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_stats(ds, port, cls, ingress); +} + +static int dsa_slave_setup_tc_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + switch (cls->command) { + case FLOW_CLS_REPLACE: + return dsa_slave_add_cls_flower(dev, cls, ingress); + case FLOW_CLS_DESTROY: + return dsa_slave_del_cls_flower(dev, cls, ingress); + case FLOW_CLS_STATS: + return dsa_slave_stats_cls_flower(dev, cls, ingress); + default: + return -EOPNOTSUPP; + } +} + static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv, bool ingress) { @@ -953,6 +1481,8 @@ static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSMATCHALL: return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); + case TC_SETUP_CLSFLOWER: + return dsa_slave_setup_tc_cls_flower(dev, type_data, ingress); default: return -EOPNOTSUPP; } @@ -1012,14 +1542,31 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, } } +static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, + void *type_data) +{ + struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port)); + + if (!master->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data); +} + static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_switch *ds = dp->ds; - if (type == TC_SETUP_BLOCK) + switch (type) { + case TC_SETUP_BLOCK: return dsa_slave_setup_tc_block(dev, type_data); + case TC_SETUP_FT: + return dsa_slave_setup_ft_block(ds, dp->index, type_data); + default: + break; + } if (!ds->ops->port_setup_tc) return -EOPNOTSUPP; @@ -1027,34 +1574,6 @@ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } -static void dsa_slave_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct pcpu_sw_netstats *s; - unsigned int start; - int i; - - netdev_stats_to_stats64(stats, &dev->stats); - for_each_possible_cpu(i) { - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; - - s = per_cpu_ptr(p->stats64, i); - do { - start = u64_stats_fetch_begin_irq(&s->syncp); - tx_packets = s->tx_packets; - tx_bytes = s->tx_bytes; - rx_packets = s->rx_packets; - rx_bytes = s->rx_bytes; - } while (u64_stats_fetch_retry_irq(&s->syncp, start)); - - stats->tx_packets += tx_packets; - stats->tx_bytes += tx_bytes; - stats->rx_packets += rx_packets; - stats->rx_bytes += rx_bytes; - } -} - static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { @@ -1095,32 +1614,31 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct bridge_vlan_info info; + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid = vid, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + struct netlink_ext_ack extack = {0}; int ret; - /* Check for a possible bridge VLAN entry now since there is no - * need to emulate the switchdev prepare + commit phase. - */ - if (dp->bridge_dev) { - if (!br_vlan_enabled(dp->bridge_dev)) - return 0; - - /* br_vlan_get_info() returns -EINVAL or -ENOENT if the - * device, respectively the VID is not found, returning - * 0 means success, which is a failure for us here. - */ - ret = br_vlan_get_info(dp->bridge_dev, vid, &info); - if (ret == 0) - return -EBUSY; - } - - ret = dsa_port_vid_add(dp, vid, 0); - if (ret) + /* User port... */ + ret = dsa_port_vlan_add(dp, &vlan, &extack); + if (ret) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); return ret; + } - ret = dsa_port_vid_add(dp->cpu_dp, vid, 0); - if (ret) + /* And CPU port... */ + ret = dsa_port_host_vlan_add(dp, &vlan, &extack); + if (ret) { + if (extack._msg) + netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index, + extack._msg); return ret; + } return 0; } @@ -1129,29 +1647,486 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct bridge_vlan_info info; - int ret; + struct switchdev_obj_port_vlan vlan = { + .vid = vid, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + int err; + + err = dsa_port_vlan_del(dp, &vlan); + if (err) + return err; - /* Check for a possible bridge VLAN entry now since there is no - * need to emulate the switchdev prepare + commit phase. + return dsa_port_host_vlan_del(dp, &vlan); +} + +static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_slave_vlan_rx_add_vid(arg, proto, vid); +} + +static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_slave_vlan_rx_kill_vid(arg, proto, vid); +} + +/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN + * filtering is enabled. The baseline is that only ports that offload a + * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware, + * but there are exceptions for quirky hardware. + * + * If ds->vlan_filtering_is_global = true, then standalone ports which share + * the same switch with other ports that offload a VLAN-aware bridge are also + * inevitably VLAN-aware. + * + * To summarize, a DSA switch port offloads: + * + * - If standalone (this includes software bridge, software LAG): + * - if ds->needs_standalone_vlan_filtering = true, OR if + * (ds->vlan_filtering_is_global = true AND there are bridges spanning + * this switch chip which have vlan_filtering=1) + * - the 8021q upper VLANs + * - else (standalone VLAN filtering is not needed, VLAN filtering is not + * global, or it is, but no port is under a VLAN-aware bridge): + * - no VLAN (any 8021q upper is a software VLAN) + * + * - If under a vlan_filtering=0 bridge which it offload: + * - if ds->configure_vlan_while_not_filtering = true (default): + * - the bridge VLANs. These VLANs are committed to hardware but inactive. + * - else (deprecated): + * - no VLAN. The bridge VLANs are not restored when VLAN awareness is + * enabled, so this behavior is broken and discouraged. + * + * - If under a vlan_filtering=1 bridge which it offload: + * - the bridge VLANs + * - the 8021q upper VLANs + */ +int dsa_slave_manage_vlan_filtering(struct net_device *slave, + bool vlan_filtering) +{ + int err; + + if (vlan_filtering) { + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + err = vlan_for_each(slave, dsa_slave_restore_vlan, slave); + if (err) { + vlan_for_each(slave, dsa_slave_clear_vlan, slave); + slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + return err; + } + } else { + err = vlan_for_each(slave, dsa_slave_clear_vlan, slave); + if (err) + return err; + + slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + } + + return 0; +} + +struct dsa_hw_port { + struct list_head list; + struct net_device *dev; + int old_mtu; +}; + +static int dsa_hw_port_list_set_mtu(struct list_head *hw_port_list, int mtu) +{ + const struct dsa_hw_port *p; + int err; + + list_for_each_entry(p, hw_port_list, list) { + if (p->dev->mtu == mtu) + continue; + + err = dev_set_mtu(p->dev, mtu); + if (err) + goto rollback; + } + + return 0; + +rollback: + list_for_each_entry_continue_reverse(p, hw_port_list, list) { + if (p->dev->mtu == p->old_mtu) + continue; + + if (dev_set_mtu(p->dev, p->old_mtu)) + netdev_err(p->dev, "Failed to restore MTU\n"); + } + + return err; +} + +static void dsa_hw_port_list_free(struct list_head *hw_port_list) +{ + struct dsa_hw_port *p, *n; + + list_for_each_entry_safe(p, n, hw_port_list, list) + kfree(p); +} + +/* Make the hardware datapath to/from @dev limited to a common MTU */ +static void dsa_bridge_mtu_normalization(struct dsa_port *dp) +{ + struct list_head hw_port_list; + struct dsa_switch_tree *dst; + int min_mtu = ETH_MAX_MTU; + struct dsa_port *other_dp; + int err; + + if (!dp->ds->mtu_enforcement_ingress) + return; + + if (!dp->bridge) + return; + + INIT_LIST_HEAD(&hw_port_list); + + /* Populate the list of ports that are part of the same bridge + * as the newly added/modified port */ - if (dp->bridge_dev) { - if (!br_vlan_enabled(dp->bridge_dev)) - return 0; + list_for_each_entry(dst, &dsa_tree_list, list) { + list_for_each_entry(other_dp, &dst->ports, list) { + struct dsa_hw_port *hw_port; + struct net_device *slave; + + if (other_dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dsa_port_bridge_same(dp, other_dp)) + continue; + + if (!other_dp->ds->mtu_enforcement_ingress) + continue; + + slave = other_dp->slave; + + if (min_mtu > slave->mtu) + min_mtu = slave->mtu; + + hw_port = kzalloc(sizeof(*hw_port), GFP_KERNEL); + if (!hw_port) + goto out; - /* br_vlan_get_info() returns -EINVAL or -ENOENT if the - * device, respectively the VID is not found, returning - * 0 means success, which is a failure for us here. + hw_port->dev = slave; + hw_port->old_mtu = slave->mtu; + + list_add(&hw_port->list, &hw_port_list); + } + } + + /* Attempt to configure the entire hardware bridge to the newly added + * interface's MTU first, regardless of whether the intention of the + * user was to raise or lower it. + */ + err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->slave->mtu); + if (!err) + goto out; + + /* Clearly that didn't work out so well, so just set the minimum MTU on + * all hardware bridge ports now. If this fails too, then all ports will + * still have their old MTU rolled back anyway. + */ + dsa_hw_port_list_set_mtu(&hw_port_list, min_mtu); + +out: + dsa_hw_port_list_free(&hw_port_list); +} + +int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) +{ + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + int largest_mtu = 0; + int new_master_mtu; + int old_master_mtu; + int mtu_limit; + int cpu_mtu; + int err; + + if (!ds->ops->port_change_mtu) + return -EOPNOTSUPP; + + dsa_tree_for_each_user_port(other_dp, ds->dst) { + int slave_mtu; + + /* During probe, this function will be called for each slave + * device, while not all of them have been allocated. That's + * ok, it doesn't change what the maximum is, so ignore it. */ - ret = br_vlan_get_info(dp->bridge_dev, vid, &info); - if (ret == 0) - return -EBUSY; + if (!other_dp->slave) + continue; + + /* Pretend that we already applied the setting, which we + * actually haven't (still haven't done all integrity checks) + */ + if (dp == other_dp) + slave_mtu = new_mtu; + else + slave_mtu = other_dp->slave->mtu; + + if (largest_mtu < slave_mtu) + largest_mtu = slave_mtu; } - /* Do not deprogram the CPU port as it may be shared with other user - * ports which can be members of this VLAN as well. + mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); + old_master_mtu = master->mtu; + new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); + if (new_master_mtu > mtu_limit) + return -ERANGE; + + /* If the master MTU isn't over limit, there's no need to check the CPU + * MTU, since that surely isn't either. */ - return dsa_port_vid_del(dp, vid); + cpu_mtu = largest_mtu; + + /* Start applying stuff */ + if (new_master_mtu != old_master_mtu) { + err = dev_set_mtu(master, new_master_mtu); + if (err < 0) + goto out_master_failed; + + /* We only need to propagate the MTU of the CPU port to + * upstream switches, so emit a notifier which updates them. + */ + err = dsa_port_mtu_change(cpu_dp, cpu_mtu); + if (err) + goto out_cpu_failed; + } + + err = ds->ops->port_change_mtu(ds, dp->index, new_mtu); + if (err) + goto out_port_failed; + + dev->mtu = new_mtu; + + dsa_bridge_mtu_normalization(dp); + + return 0; + +out_port_failed: + if (new_master_mtu != old_master_mtu) + dsa_port_mtu_change(cpu_dp, old_master_mtu - + dsa_tag_protocol_overhead(cpu_dp->tag_ops)); +out_cpu_failed: + if (new_master_mtu != old_master_mtu) + dev_set_mtu(master, old_master_mtu); +out_master_failed: + return err; +} + +static int __maybe_unused +dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_add_dscp_prio) + return -EOPNOTSUPP; + + if (dscp >= 64) { + netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", + dscp); + return -EINVAL; + } + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_slave_dcbnl_set_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_slave_dcbnl_add_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +static int __maybe_unused +dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = mask ? __fls(mask) : 0; + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_del_dscp_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_slave_dcbnl_del_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_slave_dcbnl_del_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +/* Pre-populate the DCB application priority table with the priorities + * configured during switch setup, which we read from hardware here. + */ +static int dsa_slave_dcbnl_init(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_get_default_prio) { + int prio = ds->ops->port_get_default_prio(ds, port); + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + .priority = prio, + }; + + if (prio < 0) + return prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + + if (ds->ops->port_get_dscp_prio) { + int protocol; + + for (protocol = 0; protocol < 64; protocol++) { + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_DSCP, + .protocol = protocol, + }; + int prio; + + prio = ds->ops->port_get_dscp_prio(ds, port, protocol); + if (prio == -EOPNOTSUPP) + continue; + if (prio < 0) + return prio; + + app.priority = prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + } + + return 0; } static const struct ethtool_ops dsa_slave_ethtool_ops = { @@ -1166,45 +2141,63 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_strings = dsa_slave_get_strings, .get_ethtool_stats = dsa_slave_get_ethtool_stats, .get_sset_count = dsa_slave_get_sset_count, + .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, + .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, + .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, + .get_rmon_stats = dsa_slave_get_rmon_stats, .set_wol = dsa_slave_set_wol, .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, .get_link_ksettings = dsa_slave_get_link_ksettings, .set_link_ksettings = dsa_slave_set_link_ksettings, + .get_pause_stats = dsa_slave_get_pause_stats, .get_pauseparam = dsa_slave_get_pauseparam, .set_pauseparam = dsa_slave_set_pauseparam, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, .get_ts_info = dsa_slave_get_ts_info, + .self_test = dsa_slave_net_selftest, }; -/* legacy way, bypassing the bridge *****************************************/ -int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags, - struct netlink_ext_ack *extack) +static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = { + .ieee_setapp = dsa_slave_dcbnl_ieee_setapp, + .ieee_delapp = dsa_slave_dcbnl_ieee_delapp, +}; + +static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - return dsa_port_fdb_add(dp, addr, vid); + return &dp->devlink_port; } -int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) +static void dsa_slave_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) { struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; - return dsa_port_fdb_del(dp, addr, vid); + if (ds->ops->get_stats64) + ds->ops->get_stats64(ds, dp->index, s); + else + dev_get_tstats64(dev, s); } -static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) +static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, + struct net_device_path *path) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_slave_to_port(ctx->dev); + struct net_device *master = dsa_port_to_master(dp); + struct dsa_port *cpu_dp = dp->cpu_dp; + + path->dev = ctx->dev; + path->type = DEV_PATH_DSA; + path->dsa.proto = cpu_dp->tag_ops->proto; + path->dsa.port = dp->index; + ctx->dev = master; - return dp->ds->devlink ? &dp->devlink_port : NULL; + return 0; } static const struct net_device_ops dsa_slave_netdev_ops = { @@ -1214,23 +2207,21 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = dsa_legacy_fdb_add, - .ndo_fdb_del = dsa_legacy_fdb_del, .ndo_fdb_dump = dsa_slave_fdb_dump, - .ndo_do_ioctl = dsa_slave_ioctl, + .ndo_eth_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = dsa_slave_netpoll_setup, .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, .ndo_poll_controller = dsa_slave_poll_controller, #endif - .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, .ndo_get_stats64 = dsa_slave_get_stats64, - .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, .ndo_get_devlink_port = dsa_slave_get_devlink_port, + .ndo_change_mtu = dsa_slave_change_mtu, + .ndo_fill_forward_path = dsa_slave_fill_forward_path, }; static struct device_type dsa_type = { @@ -1241,14 +2232,15 @@ void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up) { const struct dsa_port *dp = dsa_to_port(ds, port); - phylink_mac_change(dp->pl, up); + if (dp->pl) + phylink_mac_change(dp->pl, up); } EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change); -static void dsa_slave_phylink_fixed_state(struct net_device *dev, +static void dsa_slave_phylink_fixed_state(struct phylink_config *config, struct phylink_link_state *state) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; /* No need to check that this operation is valid, the callback would @@ -1258,7 +2250,8 @@ static void dsa_slave_phylink_fixed_state(struct net_device *dev, } /* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) +static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr, + u32 flags) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_switch *ds = dp->ds; @@ -1269,6 +2262,8 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) return -ENODEV; } + slave_dev->phydev->dev_flags |= flags; + return phylink_connect_phy(dp->pl, slave_dev->phydev); } @@ -1277,31 +2272,24 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct device_node *port_dn = dp->dn; struct dsa_switch *ds = dp->ds; - phy_interface_t mode; u32 phy_flags = 0; int ret; - ret = of_get_phy_mode(port_dn, &mode); - if (ret) - mode = PHY_INTERFACE_MODE_NA; - dp->pl_config.dev = &slave_dev->dev; dp->pl_config.type = PHYLINK_NETDEV; - dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode, - &dsa_port_phylink_mac_ops); - if (IS_ERR(dp->pl)) { - netdev_err(slave_dev, - "error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); - return PTR_ERR(dp->pl); + /* The get_fixed_state callback takes precedence over polling the + * link GPIO in PHYLINK (see phylink_get_fixed_state). Only set + * this if the switch provides such a callback. + */ + if (ds->ops->phylink_fixed_state) { + dp->pl_config.get_fixed_state = dsa_slave_phylink_fixed_state; + dp->pl_config.poll_fixed_state = true; } - /* Register only if the switch provides such a callback, since this - * callback takes precedence over polling the link GPIO in PHYLINK - * (see phylink_get_fixed_state). - */ - if (ds->ops->phylink_fixed_state) - phylink_fixed_state_cb(dp->pl, dsa_slave_phylink_fixed_state); + ret = dsa_port_phylink_create(dp); + if (ret) + return ret; if (ds->ops->get_phy_flags) phy_flags = ds->ops->get_phy_flags(ds, dp->index); @@ -1311,19 +2299,45 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) /* We could not connect to a designated PHY or SFP, so try to * use the switch internal MDIO bus instead */ - ret = dsa_slave_phy_connect(slave_dev, dp->index); - if (ret) { - netdev_err(slave_dev, - "failed to connect to port %d: %d\n", - dp->index, ret); - phylink_destroy(dp->pl); - return ret; - } + ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); + } + if (ret) { + netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + dsa_port_phylink_destroy(dp); } return ret; } +void dsa_slave_setup_tagger(struct net_device *slave) +{ + struct dsa_port *dp = dsa_slave_to_port(slave); + struct net_device *master = dsa_port_to_master(dp); + struct dsa_slave_priv *p = netdev_priv(slave); + const struct dsa_port *cpu_dp = dp->cpu_dp; + const struct dsa_switch *ds = dp->ds; + + slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; + slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; + /* Try to save one extra realloc later in the TX path (in the master) + * by also inheriting the master's needed headroom and tailroom. + * The 8021q driver also does this. + */ + slave->needed_headroom += master->needed_headroom; + slave->needed_tailroom += master->needed_tailroom; + + p->xmit = cpu_dp->tag_ops->xmit; + + slave->features = master->vlan_features | NETIF_F_HW_TC; + slave->hw_features |= NETIF_F_HW_TC; + slave->features |= NETIF_F_LLTX; + if (slave->needed_tailroom) + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); + if (ds->needs_standalone_vlan_filtering) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +} + int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); @@ -1356,24 +2370,9 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -static void dsa_slave_notify(struct net_device *dev, unsigned long val) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_notifier_register_info rinfo = { - .switch_number = dp->ds->index, - .port_number = dp->index, - .master = master, - .info.dev = dev, - }; - - call_dsa_notifiers(val, dev, &rinfo.info); -} - int dsa_slave_create(struct dsa_port *port) { - const struct dsa_port *cpu_dp = port->cpu_dp; - struct net_device *master = cpu_dp->master; + struct net_device *master = dsa_port_to_master(port); struct dsa_switch *ds = port->ds; const char *name = port->name; struct net_device *slave_dev; @@ -1389,19 +2388,21 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - slave_dev->hw_features |= NETIF_F_HW_TC; + slave_dev->rtnl_link_ops = &dsa_link_ops; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; - if (!IS_ERR_OR_NULL(port->mac)) - ether_addr_copy(slave_dev->dev_addr, port->mac); +#if IS_ENABLED(CONFIG_DCB) + slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; +#endif + if (!is_zero_ether_addr(port->mac)) + eth_hw_addr_set(slave_dev, port->mac); else eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; + if (dsa_switch_supports_uc_filtering(ds)) + slave_dev->priv_flags |= IFF_UNICAST_FLT; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->min_mtu = 0; - slave_dev->max_mtu = ETH_MAX_MTU; + if (ds->ops->port_max_mtu) + slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); SET_NETDEV_DEV(slave_dev, port->ds->dev); @@ -1409,42 +2410,77 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!p->stats64) { + slave_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!slave_dev->tstats) { free_netdev(slave_dev); return -ENOMEM; } + + ret = gro_cells_init(&p->gcells, slave_dev); + if (ret) + goto out_free; + p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - p->xmit = cpu_dp->tag_ops->xmit; port->slave = slave_dev; + dsa_slave_setup_tagger(slave_dev); netif_carrier_off(slave_dev); ret = dsa_slave_phy_setup(slave_dev); if (ret) { - netdev_err(master, "error %d setting up slave phy\n", ret); - goto out_free; + netdev_err(slave_dev, + "error %d setting up PHY for tree %d, switch %d, port %d\n", + ret, ds->dst->index, ds->index, port->index); + goto out_gcells; } - dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); + rtnl_lock(); + + ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); + if (ret && ret != -EOPNOTSUPP) + dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", + ret, ETH_DATA_LEN, port->index); - ret = register_netdev(slave_dev); + ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); + rtnl_unlock(); goto out_phy; } + if (IS_ENABLED(CONFIG_DCB)) { + ret = dsa_slave_dcbnl_init(slave_dev); + if (ret) { + netdev_err(slave_dev, + "failed to initialize DCB: %pe\n", + ERR_PTR(ret)); + rtnl_unlock(); + goto out_unregister; + } + } + + ret = netdev_upper_dev_link(master, slave_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + return 0; +out_unregister: + unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); rtnl_unlock(); - phylink_destroy(p->dp->pl); + dsa_port_phylink_destroy(p->dp); +out_gcells: + gro_cells_destroy(&p->gcells); out_free: - free_percpu(p->stats64); + free_percpu(slave_dev->tstats); free_netdev(slave_dev); port->slave = NULL; return ret; @@ -1452,51 +2488,252 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { + struct net_device *master = dsa_slave_to_master(slave_dev); struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); netif_carrier_off(slave_dev); rtnl_lock(); + netdev_upper_dev_unlink(master, slave_dev); + unregister_netdevice(slave_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); - dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - unregister_netdev(slave_dev); - phylink_destroy(dp->pl); - free_percpu(p->stats64); + dsa_port_phylink_destroy(dp); + gro_cells_destroy(&p->gcells); + free_percpu(slave_dev->tstats); free_netdev(slave_dev); } +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *old_master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct net_device *upper; + struct list_head *iter; + int err; + + if (master == old_master) + return 0; + + if (!ds->ops->port_change_master) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support changing DSA master"); + return -EOPNOTSUPP; + } + + if (!netdev_uses_dsa(master)) { + NL_SET_ERR_MSG_MOD(extack, + "Interface not eligible as DSA master"); + return -EOPNOTSUPP; + } + + netdev_for_each_upper_dev_rcu(master, upper, iter) { + if (dsa_slave_dev_check(upper)) + continue; + if (netif_is_bridge_master(upper)) + continue; + NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers"); + return -EOPNOTSUPP; + } + + /* Since we allow live-changing the DSA master, plus we auto-open the + * DSA master when the user port opens => we need to ensure that the + * new DSA master is open too. + */ + if (dev->flags & IFF_UP) { + err = dev_open(master, extack); + if (err) + return err; + } + + netdev_upper_dev_unlink(old_master, dev); + + err = netdev_upper_dev_link(master, dev, extack); + if (err) + goto out_revert_old_master_unlink; + + err = dsa_port_change_master(dp, master, extack); + if (err) + goto out_revert_master_link; + + /* Update the MTU of the new CPU port through cross-chip notifiers */ + err = dsa_slave_change_mtu(dev, dev->mtu); + if (err && err != -EOPNOTSUPP) { + netdev_warn(dev, + "nonfatal error updating MTU with new master: %pe\n", + ERR_PTR(err)); + } + + /* If the port doesn't have its own MAC address and relies on the DSA + * master's one, inherit it again from the new DSA master. + */ + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, master); + + return 0; + +out_revert_master_link: + netdev_upper_dev_unlink(master, dev); +out_revert_old_master_unlink: + netdev_upper_dev_link(old_master, dev, NULL); + return err; +} + bool dsa_slave_dev_check(const struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; } +EXPORT_SYMBOL_GPL(dsa_slave_dev_check); static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp = dsa_slave_to_port(dev); + struct netlink_ext_ack *extack; int err = NOTIFY_DONE; + if (!dsa_slave_dev_check(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) { - err = dsa_port_bridge_join(dp, info->upper_dev); + err = dsa_port_bridge_join(dp, info->upper_dev, extack); + if (!err) + dsa_bridge_mtu_normalization(dp); + if (err == -EOPNOTSUPP) { + if (extack && !extack->_msg) + NL_SET_ERR_MSG_MOD(extack, + "Offloading not supported"); + err = 0; + } err = notifier_from_errno(err); } else { dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev, + info->upper_info, extack); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } else if (is_hsr_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_hsr_join(dp, info->upper_dev); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_hsr_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } } return err; } -static int dsa_slave_upper_vlan_check(struct net_device *dev, - struct netdev_notifier_changeupper_info * - info) +static int dsa_slave_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + if (!dsa_slave_dev_check(dev)) + return NOTIFY_DONE; + + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + dsa_port_pre_bridge_leave(dp, info->upper_dev); + else if (netif_is_lag_master(info->upper_dev) && !info->linking) + dsa_port_pre_lag_leave(dp, info->upper_dev); + /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be + * meaningfully enslaved to a bridge yet + */ + + return NOTIFY_DONE; +} + +static int +dsa_slave_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + if (!netif_is_lag_master(dev)) + return err; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag) + /* Software LAG */ + continue; + + err = dsa_slave_changeupper(lower, info); + if (notifier_to_errno(err)) + break; + } + + return err; +} + +/* Same as dsa_slave_lag_changeupper() except that it calls + * dsa_slave_prechangeupper() + */ +static int +dsa_slave_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + if (!netif_is_lag_master(dev)) + return err; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag) + /* Software LAG */ + continue; + + err = dsa_slave_prechangeupper(lower, info); + if (notifier_to_errno(err)) + break; + } + + return err; +} + +static int +dsa_prevent_bridging_8021q_upper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *ext_ack; - struct net_device *slave; + struct net_device *slave, *br; struct dsa_port *dp; ext_ack = netdev_notifier_info_to_extack(&info->info); @@ -1509,11 +2746,12 @@ static int dsa_slave_upper_vlan_check(struct net_device *dev, return NOTIFY_DONE; dp = dsa_slave_to_port(slave); - if (!dp->bridge_dev) + br = dsa_port_bridge_dev_get(dp); + if (!br) return NOTIFY_DONE; /* Deny enslaving a VLAN device into a VLAN-aware bridge */ - if (br_vlan_enabled(dp->bridge_dev) && + if (br_vlan_enabled(br) && netif_is_bridge_master(info->upper_dev) && info->linking) { NL_SET_ERR_MSG_MOD(ext_ack, "Cannot enslave VLAN device into VLAN aware bridge"); @@ -1523,86 +2761,617 @@ static int dsa_slave_upper_vlan_check(struct net_device *dev, return NOTIFY_DONE; } +static int +dsa_slave_check_8021q_upper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct bridge_vlan_info br_info; + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + u16 vid; + + if (!br || !br_vlan_enabled(br)) + return NOTIFY_DONE; + + extack = netdev_notifier_info_to_extack(&info->info); + vid = vlan_dev_vlan_id(info->upper_dev); + + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the + * device, respectively the VID is not found, returning + * 0 means success, which is a failure for us here. + */ + err = br_vlan_get_info(br, vid, &br_info); + if (err == 0) { + NL_SET_ERR_MSG_MOD(extack, + "This VLAN is already configured by the bridge"); + return notifier_from_errno(-EBUSY); + } + + return NOTIFY_DONE; +} + +static int +dsa_slave_prechangeupper_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int err; + + if (!dsa_slave_dev_check(dev)) + return dsa_prevent_bridging_8021q_upper(dev, info); + + dp = dsa_slave_to_port(dev); + ds = dp->ds; + + if (ds->ops->port_prechangeupper) { + err = ds->ops->port_prechangeupper(ds, dp->index, info); + if (err) + return notifier_from_errno(err); + } + + if (is_vlan_dev(info->upper_dev)) + return dsa_slave_check_8021q_upper(dev, info); + + return NOTIFY_DONE; +} + +/* To be eligible as a DSA master, a LAG must have all lower interfaces be + * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of + * switches in the same switch tree. + */ +static int dsa_lag_master_validate(struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *lower1, *lower2; + struct list_head *iter1, *iter2; + + netdev_for_each_lower_dev(lag_dev, lower1, iter1) { + netdev_for_each_lower_dev(lag_dev, lower2, iter2) { + if (!netdev_uses_dsa(lower1) || + !netdev_uses_dsa(lower2)) { + NL_SET_ERR_MSG_MOD(extack, + "All LAG ports must be eligible as DSA masters"); + return notifier_from_errno(-EINVAL); + } + + if (lower1 == lower2) + continue; + + if (!dsa_port_tree_same(lower1->dsa_ptr, + lower2->dsa_ptr)) { + NL_SET_ERR_MSG_MOD(extack, + "LAG contains DSA masters of disjoint switch trees"); + return notifier_from_errno(-EINVAL); + } + } + } + + return NOTIFY_DONE; +} + +static int +dsa_master_prechangeupper_sanity_check(struct net_device *master, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + + if (!netdev_uses_dsa(master)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + /* Allow DSA switch uppers */ + if (dsa_slave_dev_check(info->upper_dev)) + return NOTIFY_DONE; + + /* Allow bridge uppers of DSA masters, subject to further + * restrictions in dsa_bridge_prechangelower_sanity_check() + */ + if (netif_is_bridge_master(info->upper_dev)) + return NOTIFY_DONE; + + /* Allow LAG uppers, subject to further restrictions in + * dsa_lag_master_prechangelower_sanity_check() + */ + if (netif_is_lag_master(info->upper_dev)) + return dsa_lag_master_validate(info->upper_dev, extack); + + NL_SET_ERR_MSG_MOD(extack, + "DSA master cannot join unknown upper interfaces"); + return notifier_from_errno(-EBUSY); +} + +static int +dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + struct net_device *lag_dev = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) { + NL_SET_ERR_MSG(extack, + "Only DSA masters can join a LAG DSA master"); + return notifier_from_errno(-EINVAL); + } + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { + NL_SET_ERR_MSG(extack, + "Interface is DSA master for a different switch tree than this LAG"); + return notifier_from_errno(-EINVAL); + } + + break; + } + + return NOTIFY_DONE; +} + +/* Don't allow bridging of DSA masters, since the bridge layer rx_handler + * prevents the DSA fake ethertype handler to be invoked, so we don't get the + * chance to strip off and parse the DSA switch tag protocol header (the bridge + * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these + * frames). + * The only case where that would not be an issue is when bridging can already + * be offloaded, such as when the DSA master is itself a DSA or plain switchdev + * port, and is bridged only with other ports from the same hardware device. + */ +static int +dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *br = info->upper_dev; + struct netlink_ext_ack *extack; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(br)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + extack = netdev_notifier_info_to_extack(&info->info); + + netdev_for_each_lower_dev(br, lower, iter) { + if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower)) + continue; + + if (!netdev_port_same_parent_id(lower, new_lower)) { + NL_SET_ERR_MSG(extack, + "Cannot do software bridging with a DSA master"); + return notifier_from_errno(-EINVAL); + } + } + + return NOTIFY_DONE; +} + +static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst, + struct net_device *lag_dev) +{ + struct net_device *new_master = dsa_tree_find_first_master(dst); + struct dsa_port *dp; + int err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, new_master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + new_master->name, ERR_PTR(err)); + } + } +} + +static int dsa_master_lag_join(struct net_device *master, + struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + int err; + + err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack); + if (err) + return err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != master) + continue; + + err = dsa_slave_change_master(dp->slave, lag_dev, extack); + if (err) + goto restore; + } + + return 0; + +restore: + dsa_tree_for_each_user_port_continue_reverse(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + master->name, ERR_PTR(err)); + } + } + + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); + + return err; +} + +static void dsa_master_lag_leave(struct net_device *master, + struct net_device *lag_dev) +{ + struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *new_cpu_dp = NULL; + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (netdev_uses_dsa(lower)) { + new_cpu_dp = lower->dsa_ptr; + break; + } + } + + if (new_cpu_dp) { + /* Update the CPU port of the user ports still under the LAG + * so that dsa_port_to_master() continues to work properly + */ + dsa_tree_for_each_user_port(dp, dst) + if (dsa_port_to_master(dp) == lag_dev) + dp->cpu_dp = new_cpu_dp; + + /* Update the index of the virtual CPU port to match the lowest + * physical CPU port + */ + lag_dev->dsa_ptr = new_cpu_dp; + wmb(); + } else { + /* If the LAG DSA master has no ports left, migrate back all + * user ports to the first physical CPU port + */ + dsa_tree_migrate_ports_from_lag_master(dst, lag_dev); + } + + /* This DSA master has left its LAG in any case, so let + * the CPU port leave the hardware LAG as well + */ + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); +} + +static int dsa_master_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_master_lag_join(dev, info->upper_dev, + info->upper_info, extack); + err = notifier_from_errno(err); + } else { + dsa_master_lag_leave(dev, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + static int dsa_slave_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (event == NETDEV_CHANGEUPPER) { - if (!dsa_slave_dev_check(dev)) - return dsa_slave_upper_vlan_check(dev, ptr); + switch (event) { + case NETDEV_PRECHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; + int err; + + err = dsa_slave_prechangeupper_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_master_prechangeupper_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_lag_master_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_bridge_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_slave_prechangeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_slave_lag_prechangeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + break; + } + case NETDEV_CHANGEUPPER: { + int err; + + err = dsa_slave_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_slave_lag_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_master_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + break; + } + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err = 0; + + if (dsa_slave_dev_check(dev)) { + dp = dsa_slave_to_port(dev); - return dsa_slave_changeupper(dev, ptr); + err = dsa_port_lag_change(dp, info->lower_state_info); + } + + /* Mirror LAG port events on DSA masters that are in + * a LAG towards their respective switch CPU ports + */ + if (netdev_uses_dsa(dev)) { + dp = dev->dsa_ptr; + + err = dsa_port_lag_change(dp, info->lower_state_info); + } + + return notifier_from_errno(err); + } + case NETDEV_CHANGE: + case NETDEV_UP: { + /* Track state of master port. + * DSA driver may require the master port (and indirectly + * the tagger) to be available for some special operation. + */ + if (netdev_uses_dsa(dev)) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->ds->dst; + + /* Track when the master port is UP */ + dsa_tree_master_oper_state_change(dst, dev, + netif_oper_up(dev)); + + /* Track when the master port is ready and can accept + * packet. + * NETDEV_UP event is not enough to flag a port as ready. + * We also have to wait for linkwatch_do_dev to dev_activate + * and emit a NETDEV_CHANGE event. + * We check if a master port is ready by checking if the dev + * have a qdisc assigned and is not noop. + */ + dsa_tree_master_admin_state_change(dst, dev, + !qdisc_tx_is_noop(dev)); + + return NOTIFY_OK; + } + + return NOTIFY_DONE; + } + case NETDEV_GOING_DOWN: { + struct dsa_port *dp, *cpu_dp; + struct dsa_switch_tree *dst; + LIST_HEAD(close_list); + + if (!netdev_uses_dsa(dev)) + return NOTIFY_DONE; + + cpu_dp = dev->dsa_ptr; + dst = cpu_dp->ds->dst; + + dsa_tree_master_admin_state_change(dst, dev, false); + + list_for_each_entry(dp, &dst->ports, list) { + if (!dsa_port_is_user(dp)) + continue; + + if (dp->cpu_dp != cpu_dp) + continue; + + list_add(&dp->slave->close_list, &close_list); + } + + dev_close_many(&close_list, true); + + return NOTIFY_OK; + } + default: + break; } return NOTIFY_DONE; } -struct dsa_switchdev_event_work { - struct work_struct work; - struct switchdev_notifier_fdb_info fdb_info; - struct net_device *dev; - unsigned long event; -}; +static void +dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) +{ + struct switchdev_notifier_fdb_info info = {}; + + info.addr = switchdev_work->addr; + info.vid = switchdev_work->vid; + info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, + switchdev_work->orig_dev, &info.info, NULL); +} static void dsa_slave_switchdev_event_work(struct work_struct *work) { struct dsa_switchdev_event_work *switchdev_work = container_of(work, struct dsa_switchdev_event_work, work); + const unsigned char *addr = switchdev_work->addr; struct net_device *dev = switchdev_work->dev; - struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_port *dp = dsa_slave_to_port(dev); + u16 vid = switchdev_work->vid; + struct dsa_switch *ds; + struct dsa_port *dp; int err; - rtnl_lock(); + dp = dsa_slave_to_port(dev); + ds = dp->ds; + switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: - fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) - break; - - err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); + if (switchdev_work->host_addr) + err = dsa_port_bridge_host_fdb_add(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_add(dp, addr, vid); + else + err = dsa_port_fdb_add(dp, addr, vid); if (err) { - netdev_dbg(dev, "fdb add failed err=%d\n", err); + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, addr, vid, err); break; } - fdb_info->offloaded = true; - call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, - &fdb_info->info, NULL); + dsa_fdb_offload_notify(switchdev_work); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: - fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) - break; - - err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); + if (switchdev_work->host_addr) + err = dsa_port_bridge_host_fdb_del(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_del(dp, addr, vid); + else + err = dsa_port_fdb_del(dp, addr, vid); if (err) { - netdev_dbg(dev, "fdb del failed err=%d\n", err); - dev_close(dev); + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, addr, vid, err); } + break; } - rtnl_unlock(); - kfree(switchdev_work->fdb_info.addr); kfree(switchdev_work); - dev_put(dev); } -static int -dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work * - switchdev_work, - const struct switchdev_notifier_fdb_info * - fdb_info) -{ - memcpy(&switchdev_work->fdb_info, fdb_info, - sizeof(switchdev_work->fdb_info)); - switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); - if (!switchdev_work->fdb_info.addr) +static bool dsa_foreign_dev_check(const struct net_device *dev, + const struct net_device *foreign_dev) +{ + const struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch_tree *dst = dp->ds->dst; + + if (netif_is_bridge_master(foreign_dev)) + return !dsa_tree_offloads_bridge_dev(dst, foreign_dev); + + if (netif_is_bridge_port(foreign_dev)) + return !dsa_tree_offloads_bridge_port(dst, foreign_dev); + + /* Everything else is foreign */ + return true; +} + +static int dsa_slave_fdb_event(struct net_device *dev, + struct net_device *orig_dev, + unsigned long event, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + struct dsa_switchdev_event_work *switchdev_work; + struct dsa_port *dp = dsa_slave_to_port(dev); + bool host_addr = fdb_info->is_local; + struct dsa_switch *ds = dp->ds; + + if (ctx && ctx != dp) + return 0; + + if (!dp->bridge) + return 0; + + if (switchdev_fdb_is_dynamically_learned(fdb_info)) { + if (dsa_port_offloads_bridge_port(dp, orig_dev)) + return 0; + + /* FDB entries learned by the software bridge or by foreign + * bridge ports should be installed as host addresses only if + * the driver requests assisted learning. + */ + if (!ds->assisted_learning_on_cpu_port) + return 0; + } + + /* Also treat FDB entries on foreign interfaces bridged with us as host + * addresses. + */ + if (dsa_foreign_dev_check(dev, orig_dev)) + host_addr = true; + + /* Check early that we're not doing work in vain. + * Host addresses on LAG ports still require regular FDB ops, + * since the CPU port isn't in a LAG. + */ + if (dp->lag && !host_addr) { + if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del) + return -EOPNOTSUPP; + } else { + if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) + return -EOPNOTSUPP; + } + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) return -ENOMEM; - ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, - fdb_info->addr); + + netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", + event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", + orig_dev->name, fdb_info->addr, fdb_info->vid, + host_addr ? " as host address" : ""); + + INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); + switchdev_work->event = event; + switchdev_work->dev = dev; + switchdev_work->orig_dev = orig_dev; + + ether_addr_copy(switchdev_work->addr, fdb_info->addr); + switchdev_work->vid = fdb_info->vid; + switchdev_work->host_addr = host_addr; + + dsa_schedule_work(&switchdev_work->work); + return 0; } @@ -1611,46 +3380,26 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); - struct dsa_switchdev_event_work *switchdev_work; int err; - if (event == SWITCHDEV_PORT_ATTR_SET) { + switch (event) { + case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, dsa_slave_dev_check, dsa_slave_port_attr_set); return notifier_from_errno(err); - } - - if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; - - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; - - INIT_WORK(&switchdev_work->work, - dsa_slave_switchdev_event_work); - switchdev_work->dev = dev; - switchdev_work->event = event; - - switch (event) { - case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr)) - goto err_fdb_work_init; - dev_hold(dev); - break; + err = switchdev_handle_fdb_event_to_device(dev, event, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_event); + return notifier_from_errno(err); default: - kfree(switchdev_work); return NOTIFY_DONE; } - dsa_schedule_work(&switchdev_work->work); return NOTIFY_OK; - -err_fdb_work_init: - kfree(switchdev_work); - return NOTIFY_BAD; } static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, @@ -1661,14 +3410,16 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_PORT_OBJ_ADD: - err = switchdev_handle_port_obj_add(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_obj_add); + err = switchdev_handle_port_obj_add_foreign(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_port_obj_add); return notifier_from_errno(err); case SWITCHDEV_PORT_OBJ_DEL: - err = switchdev_handle_port_obj_del(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_obj_del); + err = switchdev_handle_port_obj_del_foreign(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_port_obj_del); return notifier_from_errno(err); case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, @@ -1684,11 +3435,11 @@ static struct notifier_block dsa_slave_nb __read_mostly = { .notifier_call = dsa_slave_netdevice_event, }; -static struct notifier_block dsa_slave_switchdev_notifier = { +struct notifier_block dsa_slave_switchdev_notifier = { .notifier_call = dsa_slave_switchdev_event, }; -static struct notifier_block dsa_slave_switchdev_blocking_notifier = { +struct notifier_block dsa_slave_switchdev_blocking_notifier = { .notifier_call = dsa_slave_switchdev_blocking_event, }; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index df4abe897ed6..ce56acdba203 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -17,14 +17,11 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - int i; - - for (i = 0; i < ds->num_ports; ++i) { - struct dsa_port *dp = dsa_to_port(ds, i); + struct dsa_port *dp; + dsa_switch_for_each_port(dp, ds) if (dp->ageing_time && dp->ageing_time < ageing_time) ageing_time = dp->ageing_time; - } return ageing_time; } @@ -33,15 +30,12 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, struct dsa_notifier_ageing_time_info *info) { unsigned int ageing_time = info->ageing_time; - struct switchdev_trans *trans = info->trans; - if (switchdev_trans_ph_prepare(trans)) { - if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) - return -ERANGE; - if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) - return -ERANGE; - return 0; - } + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; /* Program the fastest ageing time in case of multiple bridges */ ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time); @@ -52,15 +46,60 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, return 0; } +static bool dsa_port_mtu_match(struct dsa_port *dp, + struct dsa_notifier_mtu_info *info) +{ + return dp == info->dp || dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp); +} + +static int dsa_switch_mtu(struct dsa_switch *ds, + struct dsa_notifier_mtu_info *info) +{ + struct dsa_port *dp; + int ret; + + if (!ds->ops->port_change_mtu) + return -EOPNOTSUPP; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_mtu_match(dp, info)) { + ret = ds->ops->port_change_mtu(ds, dp->index, + info->mtu); + if (ret) + return ret; + } + } + + return 0; +} + static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - if (ds->index == info->sw_index && ds->ops->port_bridge_join) - return ds->ops->port_bridge_join(ds, info->port, info->br); + int err; + + if (info->dp->ds == ds) { + if (!ds->ops->port_bridge_join) + return -EOPNOTSUPP; - if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join) - return ds->ops->crosschip_bridge_join(ds, info->sw_index, - info->port, info->br); + err = ds->ops->port_bridge_join(ds, info->dp->index, + info->bridge, + &info->tx_fwd_offload, + info->extack); + if (err) + return err; + } + + if (info->dp->ds != ds && ds->ops->crosschip_bridge_join) { + err = ds->ops->crosschip_bridge_join(ds, + info->dp->ds->dst->index, + info->dp->ds->index, + info->dp->index, + info->bridge, + info->extack); + if (err) + return err; + } return 0; } @@ -68,191 +107,662 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - bool unset_vlan_filtering = br_vlan_enabled(info->br); - int err, i; + if (info->dp->ds == ds && ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->dp->index, info->bridge); - if (ds->index == info->sw_index && ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->br); + if (info->dp->ds != ds && ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->dp->ds->dst->index, + info->dp->ds->index, + info->dp->index, + info->bridge); - if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, - info->br); + return 0; +} - /* If the bridge was vlan_filtering, the bridge core doesn't trigger an - * event for changing vlan_filtering setting upon slave ports leaving - * it. That is a good thing, because that lets us handle it and also - * handle the case where the switch's vlan_filtering setting is global - * (not per port). When that happens, the correct moment to trigger the - * vlan_filtering callback is only when the last port left this bridge. - */ - if (unset_vlan_filtering && ds->vlan_filtering_is_global) { - for (i = 0; i < ds->num_ports; i++) { - if (i == info->port) - continue; - if (dsa_to_port(ds, i)->bridge_dev == info->br) { - unset_vlan_filtering = false; - break; +/* Matches for all upstream-facing ports (the CPU port and all upstream-facing + * DSA links) that sit between the targeted port on which the notifier was + * emitted and its dedicated CPU port. + */ +static bool dsa_port_host_address_match(struct dsa_port *dp, + const struct dsa_port *targeted_dp) +{ + struct dsa_port *cpu_dp = targeted_dp->cpu_dp; + + if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds)) + return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index, + cpu_dp->index); + + return false; +} + +static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_mac_addr *a; + + list_for_each_entry(a, addr_list, list) + if (ether_addr_equal(a->addr, addr) && a->vid == vid && + dsa_db_equal(&a->db, &db)) + return a; + + return NULL; +} + +static int dsa_port_do_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_mac_addr *a; + int port = dp->index; + int err = 0; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_mdb_add(ds, port, mdb, db); + + mutex_lock(&dp->addr_lists_lock); + + a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); + if (a) { + refcount_inc(&a->refcount); + goto out; + } + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out; + } + + err = ds->ops->port_mdb_add(ds, port, mdb, db); + if (err) { + kfree(a); + goto out; + } + + ether_addr_copy(a->addr, mdb->addr); + a->vid = mdb->vid; + a->db = db; + refcount_set(&a->refcount, 1); + list_add_tail(&a->list, &dp->mdbs); + +out: + mutex_unlock(&dp->addr_lists_lock); + + return err; +} + +static int dsa_port_do_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_mac_addr *a; + int port = dp->index; + int err = 0; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_mdb_del(ds, port, mdb, db); + + mutex_lock(&dp->addr_lists_lock); + + a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); + if (!a) { + err = -ENOENT; + goto out; + } + + if (!refcount_dec_and_test(&a->refcount)) + goto out; + + err = ds->ops->port_mdb_del(ds, port, mdb, db); + if (err) { + refcount_set(&a->refcount, 1); + goto out; + } + + list_del(&a->list); + kfree(a); + +out: + mutex_unlock(&dp->addr_lists_lock); + + return err; +} + +static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid, struct dsa_db db) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_mac_addr *a; + int port = dp->index; + int err = 0; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_fdb_add(ds, port, addr, vid, db); + + mutex_lock(&dp->addr_lists_lock); + + a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); + if (a) { + refcount_inc(&a->refcount); + goto out; + } + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out; + } + + err = ds->ops->port_fdb_add(ds, port, addr, vid, db); + if (err) { + kfree(a); + goto out; + } + + ether_addr_copy(a->addr, addr); + a->vid = vid; + a->db = db; + refcount_set(&a->refcount, 1); + list_add_tail(&a->list, &dp->fdbs); + +out: + mutex_unlock(&dp->addr_lists_lock); + + return err; +} + +static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid, struct dsa_db db) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_mac_addr *a; + int port = dp->index; + int err = 0; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_fdb_del(ds, port, addr, vid, db); + + mutex_lock(&dp->addr_lists_lock); + + a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); + if (!a) { + err = -ENOENT; + goto out; + } + + if (!refcount_dec_and_test(&a->refcount)) + goto out; + + err = ds->ops->port_fdb_del(ds, port, addr, vid, db); + if (err) { + refcount_set(&a->refcount, 1); + goto out; + } + + list_del(&a->list); + kfree(a); + +out: + mutex_unlock(&dp->addr_lists_lock); + + return err; +} + +static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_mac_addr *a; + int err = 0; + + mutex_lock(&lag->fdb_lock); + + a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); + if (a) { + refcount_inc(&a->refcount); + goto out; + } + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out; + } + + err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db); + if (err) { + kfree(a); + goto out; + } + + ether_addr_copy(a->addr, addr); + a->vid = vid; + a->db = db; + refcount_set(&a->refcount, 1); + list_add_tail(&a->list, &lag->fdbs); + +out: + mutex_unlock(&lag->fdb_lock); + + return err; +} + +static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct dsa_mac_addr *a; + int err = 0; + + mutex_lock(&lag->fdb_lock); + + a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); + if (!a) { + err = -ENOENT; + goto out; + } + + if (!refcount_dec_and_test(&a->refcount)) + goto out; + + err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db); + if (err) { + refcount_set(&a->refcount, 1); + goto out; + } + + list_del(&a->list); + kfree(a); + +out: + mutex_unlock(&lag->fdb_lock); + + return err; +} + +static int dsa_switch_host_fdb_add(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + struct dsa_port *dp; + int err = 0; + + if (!ds->ops->port_fdb_add) + return -EOPNOTSUPP; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->dp)) { + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_add(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_add(dp, info->addr, + info->vid, info->db); } + if (err) + break; } } - if (unset_vlan_filtering) { - struct switchdev_trans trans = {0}; - err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &trans); - if (err && err != EOPNOTSUPP) - return err; + return err; +} + +static int dsa_switch_host_fdb_del(struct dsa_switch *ds, + struct dsa_notifier_fdb_info *info) +{ + struct dsa_port *dp; + int err = 0; + + if (!ds->ops->port_fdb_del) + return -EOPNOTSUPP; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->dp)) { + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_del(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_del(dp, info->addr, + info->vid, info->db); + } + if (err) + break; + } } - return 0; + + return err; } static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->ops->port_fdb_add(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - int port = dsa_towards_port(ds, info->sw_index, info->port); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db); } -static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mdb_info *info) +static int dsa_switch_lag_fdb_add(struct dsa_switch *ds, + struct dsa_notifier_lag_fdb_info *info) { - if (ds->index == info->sw_index && port == info->port) - return true; + struct dsa_port *dp; - if (dsa_is_dsa_port(ds, port)) - return true; + if (!ds->ops->lag_fdb_add) + return -EOPNOTSUPP; - return false; + /* Notify switch only if it has a port in this LAG */ + dsa_switch_for_each_port(dp, ds) + if (dsa_port_offloads_lag(dp, info->lag)) + return dsa_switch_do_lag_fdb_add(ds, info->lag, + info->addr, info->vid, + info->db); + + return 0; } -static int dsa_switch_mdb_prepare(struct dsa_switch *ds, - struct dsa_notifier_mdb_info *info) +static int dsa_switch_lag_fdb_del(struct dsa_switch *ds, + struct dsa_notifier_lag_fdb_info *info) { - int port, err; + struct dsa_port *dp; - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + if (!ds->ops->lag_fdb_del) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mdb_match(ds, port, info)) { - err = ds->ops->port_mdb_prepare(ds, port, info->mdb); - if (err) - return err; - } - } + /* Notify switch only if it has a port in this LAG */ + dsa_switch_for_each_port(dp, ds) + if (dsa_port_offloads_lag(dp, info->lag)) + return dsa_switch_do_lag_fdb_del(ds, info->lag, + info->addr, info->vid, + info->db); return 0; } +static int dsa_switch_lag_change(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (info->dp->ds == ds && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->dp->index); + + if (info->dp->ds != ds && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->dp->ds->index, + info->dp->index); + + return 0; +} + +static int dsa_switch_lag_join(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (info->dp->ds == ds && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->dp->index, info->lag, + info->info, info->extack); + + if (info->dp->ds != ds && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->dp->ds->index, + info->dp->index, info->lag, + info->info, info->extack); + + return -EOPNOTSUPP; +} + +static int dsa_switch_lag_leave(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (info->dp->ds == ds && ds->ops->port_lag_leave) + return ds->ops->port_lag_leave(ds, info->dp->index, info->lag); + + if (info->dp->ds != ds && ds->ops->crosschip_lag_leave) + return ds->ops->crosschip_lag_leave(ds, info->dp->ds->index, + info->dp->index, info->lag); + + return -EOPNOTSUPP; +} + static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_mdb_prepare(ds, info); + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_add) - return 0; - - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_mdb_match(ds, port, info)) - ds->ops->port_mdb_add(ds, port, info->mdb); + return -EOPNOTSUPP; - return 0; + return dsa_port_do_mdb_add(dp, info->mdb, info->db); } static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { + int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index); + struct dsa_port *dp = dsa_to_port(ds, port); + if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - if (ds->index == info->sw_index) - return ds->ops->port_mdb_del(ds, info->port, info->mdb); + return dsa_port_do_mdb_del(dp, info->mdb, info->db); +} + +static int dsa_switch_host_mdb_add(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) +{ + struct dsa_port *dp; + int err = 0; - return 0; + if (!ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->dp)) { + err = dsa_port_do_mdb_add(dp, info->mdb, info->db); + if (err) + break; + } + } + + return err; } -static int dsa_port_vlan_device_check(struct net_device *vlan_dev, - int vlan_dev_vid, - void *arg) +static int dsa_switch_host_mdb_del(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) { - struct switchdev_obj_port_vlan *vlan = arg; - u16 vid; + struct dsa_port *dp; + int err = 0; + + if (!ds->ops->port_mdb_del) + return -EOPNOTSUPP; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - if (vid == vlan_dev_vid) - return -EBUSY; + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->dp)) { + err = dsa_port_do_mdb_del(dp, info->mdb, info->db); + if (err) + break; + } } - return 0; + return err; } -static int dsa_port_vlan_check(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +/* Port VLANs match on the targeted port and on all DSA ports */ +static bool dsa_port_vlan_match(struct dsa_port *dp, + struct dsa_notifier_vlan_info *info) { - const struct dsa_port *dp = dsa_to_port(ds, port); + return dsa_port_is_dsa(dp) || dp == info->dp; +} + +/* Host VLANs match on the targeted port's CPU port, and on all DSA ports + * (upstream and downstream) of that switch and its upstream switches. + */ +static bool dsa_port_host_vlan_match(struct dsa_port *dp, + const struct dsa_port *targeted_dp) +{ + struct dsa_port *cpu_dp = targeted_dp->cpu_dp; + + if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds)) + return dsa_port_is_dsa(dp) || dp == cpu_dp; + + return false; +} + +static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, + const struct switchdev_obj_port_vlan *vlan) +{ + struct dsa_vlan *v; + + list_for_each_entry(v, vlan_list, list) + if (v->vid == vlan->vid) + return v; + + return NULL; +} + +static int dsa_port_do_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + struct dsa_vlan *v; int err = 0; - /* Device is not bridged, let it proceed with the VLAN device - * creation. - */ - if (!dp->bridge_dev) - return err; + /* No need to bother with refcounting for user ports. */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_vlan_add(ds, port, vlan, extack); - /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and - * already checks whether there is an overlapping bridge VLAN entry - * with the same VID, so here we only need to check that if we are - * adding a bridge VLAN entry there is not an overlapping VLAN device - * claiming that VID. + /* No need to propagate on shared ports the existing VLANs that were + * re-notified after just the flags have changed. This would cause a + * refcount bump which we need to avoid, since it unbalances the + * additions with the deletions. */ - return vlan_for_each(dp->slave, dsa_port_vlan_device_check, - (void *)vlan); + if (vlan->changed) + return 0; + + mutex_lock(&dp->vlans_lock); + + v = dsa_vlan_find(&dp->vlans, vlan); + if (v) { + refcount_inc(&v->refcount); + goto out; + } + + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) { + err = -ENOMEM; + goto out; + } + + err = ds->ops->port_vlan_add(ds, port, vlan, extack); + if (err) { + kfree(v); + goto out; + } + + v->vid = vlan->vid; + refcount_set(&v->refcount, 1); + list_add_tail(&v->list, &dp->vlans); + +out: + mutex_unlock(&dp->vlans_lock); + + return err; } -static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, - struct dsa_notifier_vlan_info *info) +static int dsa_port_do_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan) { - if (ds->index == info->sw_index && port == info->port) - return true; + struct dsa_switch *ds = dp->ds; + int port = dp->index; + struct dsa_vlan *v; + int err = 0; - if (dsa_is_dsa_port(ds, port)) - return true; + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->port_vlan_del(ds, port, vlan); - return false; + mutex_lock(&dp->vlans_lock); + + v = dsa_vlan_find(&dp->vlans, vlan); + if (!v) { + err = -ENOENT; + goto out; + } + + if (!refcount_dec_and_test(&v->refcount)) + goto out; + + err = ds->ops->port_vlan_del(ds, port, vlan); + if (err) { + refcount_set(&v->refcount, 1); + goto out; + } + + list_del(&v->list); + kfree(v); + +out: + mutex_unlock(&dp->vlans_lock); + + return err; } -static int dsa_switch_vlan_prepare(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) +static int dsa_switch_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_vlan_match(ds, port, info)) { - err = dsa_port_vlan_check(ds, port, info->vlan); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_vlan_match(dp, info)) { + err = dsa_port_do_vlan_add(dp, info->vlan, + info->extack); if (err) return err; + } + } - err = ds->ops->port_vlan_prepare(ds, port, info->vlan); + return 0; +} + +static int dsa_switch_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) +{ + struct dsa_port *dp; + int err; + + if (!ds->ops->port_vlan_del) + return -EOPNOTSUPP; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_vlan_match(dp, info)) { + err = dsa_port_do_vlan_del(dp, info->vlan); if (err) return err; } @@ -261,39 +771,148 @@ static int dsa_switch_vlan_prepare(struct dsa_switch *ds, return 0; } -static int dsa_switch_vlan_add(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) +static int dsa_switch_host_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_vlan_prepare(ds, info); + struct dsa_port *dp; + int err; if (!ds->ops->port_vlan_add) - return 0; + return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_vlan_match(ds, port, info)) - ds->ops->port_vlan_add(ds, port, info->vlan); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_vlan_match(dp, info->dp)) { + err = dsa_port_do_vlan_add(dp, info->vlan, + info->extack); + if (err) + return err; + } + } return 0; } -static int dsa_switch_vlan_del(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) +static int dsa_switch_host_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { + struct dsa_port *dp; + int err; + if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; - if (ds->index == info->sw_index) - return ds->ops->port_vlan_del(ds, info->port, info->vlan); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_vlan_match(dp, info->dp)) { + err = dsa_port_do_vlan_del(dp, info->vlan); + if (err) + return err; + } + } + + return 0; +} + +static int dsa_switch_change_tag_proto(struct dsa_switch *ds, + struct dsa_notifier_tag_proto_info *info) +{ + const struct dsa_device_ops *tag_ops = info->tag_ops; + struct dsa_port *dp, *cpu_dp; + int err; + + if (!ds->ops->change_tag_protocol) + return -EOPNOTSUPP; + + ASSERT_RTNL(); + + err = ds->ops->change_tag_protocol(ds, tag_ops->proto); + if (err) + return err; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) + dsa_port_set_tag_protocol(cpu_dp, tag_ops); + + /* Now that changing the tag protocol can no longer fail, let's update + * the remaining bits which are "duplicated for faster access", and the + * bits that depend on the tagger, such as the MTU. + */ + dsa_switch_for_each_user_port(dp, ds) { + struct net_device *slave = dp->slave; + + dsa_slave_setup_tagger(slave); + + /* rtnl_mutex is held in dsa_tree_change_tag_proto */ + dsa_slave_change_mtu(slave, slave->mtu); + } + + return 0; +} + +/* We use the same cross-chip notifiers to inform both the tagger side, as well + * as the switch side, of connection and disconnection events. + * Since ds->tagger_data is owned by the tagger, it isn't a hard error if the + * switch side doesn't support connecting to this tagger, and therefore, the + * fact that we don't disconnect the tagger side doesn't constitute a memory + * leak: the tagger will still operate with persistent per-switch memory, just + * with the switch side unconnected to it. What does constitute a hard error is + * when the switch side supports connecting but fails. + */ +static int +dsa_switch_connect_tag_proto(struct dsa_switch *ds, + struct dsa_notifier_tag_proto_info *info) +{ + const struct dsa_device_ops *tag_ops = info->tag_ops; + int err; + + /* Notify the new tagger about the connection to this switch */ + if (tag_ops->connect) { + err = tag_ops->connect(ds); + if (err) + return err; + } + + if (!ds->ops->connect_tag_protocol) + return -EOPNOTSUPP; + + /* Notify the switch about the connection to the new tagger */ + err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); + if (err) { + /* Revert the new tagger's connection to this tree */ + if (tag_ops->disconnect) + tag_ops->disconnect(ds); + return err; + } - /* Do not deprogram the DSA links as they may be used as conduit - * for other VLAN members in the fabric. + return 0; +} + +static int +dsa_switch_disconnect_tag_proto(struct dsa_switch *ds, + struct dsa_notifier_tag_proto_info *info) +{ + const struct dsa_device_ops *tag_ops = info->tag_ops; + + /* Notify the tagger about the disconnection from this switch */ + if (tag_ops->disconnect && ds->tagger_data) + tag_ops->disconnect(ds); + + /* No need to notify the switch, since it shouldn't have any + * resources to tear down */ return 0; } +static int +dsa_switch_master_state_change(struct dsa_switch *ds, + struct dsa_notifier_master_state_info *info) +{ + if (!ds->ops->master_state_change) + return 0; + + ds->ops->master_state_change(ds, info->master, info->operational); + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -316,27 +935,77 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_HOST_FDB_ADD: + err = dsa_switch_host_fdb_add(ds, info); + break; + case DSA_NOTIFIER_HOST_FDB_DEL: + err = dsa_switch_host_fdb_del(ds, info); + break; + case DSA_NOTIFIER_LAG_FDB_ADD: + err = dsa_switch_lag_fdb_add(ds, info); + break; + case DSA_NOTIFIER_LAG_FDB_DEL: + err = dsa_switch_lag_fdb_del(ds, info); + break; + case DSA_NOTIFIER_LAG_CHANGE: + err = dsa_switch_lag_change(ds, info); + break; + case DSA_NOTIFIER_LAG_JOIN: + err = dsa_switch_lag_join(ds, info); + break; + case DSA_NOTIFIER_LAG_LEAVE: + err = dsa_switch_lag_leave(ds, info); + break; case DSA_NOTIFIER_MDB_ADD: err = dsa_switch_mdb_add(ds, info); break; case DSA_NOTIFIER_MDB_DEL: err = dsa_switch_mdb_del(ds, info); break; + case DSA_NOTIFIER_HOST_MDB_ADD: + err = dsa_switch_host_mdb_add(ds, info); + break; + case DSA_NOTIFIER_HOST_MDB_DEL: + err = dsa_switch_host_mdb_del(ds, info); + break; case DSA_NOTIFIER_VLAN_ADD: err = dsa_switch_vlan_add(ds, info); break; case DSA_NOTIFIER_VLAN_DEL: err = dsa_switch_vlan_del(ds, info); break; + case DSA_NOTIFIER_HOST_VLAN_ADD: + err = dsa_switch_host_vlan_add(ds, info); + break; + case DSA_NOTIFIER_HOST_VLAN_DEL: + err = dsa_switch_host_vlan_del(ds, info); + break; + case DSA_NOTIFIER_MTU: + err = dsa_switch_mtu(ds, info); + break; + case DSA_NOTIFIER_TAG_PROTO: + err = dsa_switch_change_tag_proto(ds, info); + break; + case DSA_NOTIFIER_TAG_PROTO_CONNECT: + err = dsa_switch_connect_tag_proto(ds, info); + break; + case DSA_NOTIFIER_TAG_PROTO_DISCONNECT: + err = dsa_switch_disconnect_tag_proto(ds, info); + break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD: + err = dsa_switch_tag_8021q_vlan_add(ds, info); + break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: + err = dsa_switch_tag_8021q_vlan_del(ds, info); + break; + case DSA_NOTIFIER_MASTER_STATE_CHANGE: + err = dsa_switch_master_state_change(ds, info); + break; default: err = -EOPNOTSUPP; break; } - /* Non-switchdev operations cannot be rolled back. If a DSA driver - * returns an error during the chained call, switch chips may be in an - * inconsistent state. - */ if (err) dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n", event, err); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 2fb6c26294b5..34e5ec5d3e23 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -2,12 +2,10 @@ /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> * * This module is not a complete tagger implementation. It only provides - * primitives for taggers that rely on 802.1Q VLAN tags to use. The - * dsa_8021q_netdev_ops is registered for API compliance and not used - * directly by callers. + * primitives for taggers that rely on 802.1Q VLAN tags to use. */ -#include <linux/if_bridge.h> #include <linux/if_vlan.h> +#include <linux/dsa/8021q.h> #include "dsa_priv.h" @@ -16,67 +14,70 @@ * * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * +-----------+-----+-----------------+-----------+-----------------------+ - * | DIR | RSV | SWITCH_ID | RSV | PORT | + * | RSV | VBID| SWITCH_ID | VBID | PORT | * +-----------+-----+-----------------+-----------+-----------------------+ * - * DIR - VID[11:10]: - * Direction flags. - * * 1 (0b01) for RX VLAN, - * * 2 (0b10) for TX VLAN. - * These values make the special VIDs of 0, 1 and 4095 to be left - * unused by this coding scheme. - * - * RSV - VID[9]: - * To be used for further expansion of SWITCH_ID or for other purposes. - * Must be transmitted as zero and ignored on receive. + * RSV - VID[11:10]: + * Reserved. Must be set to 3 (0b11). * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and 7. * - * RSV - VID[5:4]: - * To be used for further expansion of PORT or for other purposes. - * Must be transmitted as zero and ignored on receive. + * VBID - { VID[9], VID[5:4] }: + * Virtual bridge ID. If between 1 and 7, packet targets the broadcast + * domain of a bridge. If transmitted as zero, packet targets a single + * port. * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and 15. */ -#define DSA_8021Q_DIR_SHIFT 10 -#define DSA_8021Q_DIR_MASK GENMASK(11, 10) -#define DSA_8021Q_DIR(x) (((x) << DSA_8021Q_DIR_SHIFT) & \ - DSA_8021Q_DIR_MASK) -#define DSA_8021Q_DIR_RX DSA_8021Q_DIR(1) -#define DSA_8021Q_DIR_TX DSA_8021Q_DIR(2) +#define DSA_8021Q_RSV_VAL 3 +#define DSA_8021Q_RSV_SHIFT 10 +#define DSA_8021Q_RSV_MASK GENMASK(11, 10) +#define DSA_8021Q_RSV ((DSA_8021Q_RSV_VAL << DSA_8021Q_RSV_SHIFT) & \ + DSA_8021Q_RSV_MASK) #define DSA_8021Q_SWITCH_ID_SHIFT 6 #define DSA_8021Q_SWITCH_ID_MASK GENMASK(8, 6) #define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \ DSA_8021Q_SWITCH_ID_MASK) +#define DSA_8021Q_VBID_HI_SHIFT 9 +#define DSA_8021Q_VBID_HI_MASK GENMASK(9, 9) +#define DSA_8021Q_VBID_LO_SHIFT 4 +#define DSA_8021Q_VBID_LO_MASK GENMASK(5, 4) +#define DSA_8021Q_VBID_HI(x) (((x) & GENMASK(2, 2)) >> 2) +#define DSA_8021Q_VBID_LO(x) ((x) & GENMASK(1, 0)) +#define DSA_8021Q_VBID(x) \ + (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \ + DSA_8021Q_VBID_LO_MASK) | \ + ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \ + DSA_8021Q_VBID_HI_MASK)) + #define DSA_8021Q_PORT_SHIFT 0 #define DSA_8021Q_PORT_MASK GENMASK(3, 0) #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) -/* Returns the VID to be inserted into the frame from xmit for switch steering - * instructions on egress. Encodes switch ID and port ID. - */ -u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num) { - return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + /* The VBID value of 0 is reserved for precise TX, but it is also + * reserved/invalid for the bridge_num, so all is well. + */ + return DSA_8021Q_RSV | DSA_8021Q_VBID(bridge_num); } -EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_vid); /* Returns the VID that will be installed as pvid for this switch port, sent as * tagged egress towards the CPU port and decoded by the rcv function. */ -u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + return DSA_8021Q_RSV | DSA_8021Q_SWITCH_ID(dp->ds->index) | + DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_standalone_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) @@ -92,201 +93,344 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); -static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) +/* Returns the decoded VBID from the RX VID. */ +static int dsa_tag_8021q_rx_vbid(u16 vid) { - struct bridge_vlan_info vinfo; - struct net_device *slave; - u16 pvid; - int err; + u16 vbid_hi = (vid & DSA_8021Q_VBID_HI_MASK) >> DSA_8021Q_VBID_HI_SHIFT; + u16 vbid_lo = (vid & DSA_8021Q_VBID_LO_MASK) >> DSA_8021Q_VBID_LO_SHIFT; - if (!dsa_is_user_port(ds, port)) - return 0; + return (vbid_hi << 2) | vbid_lo; +} + +bool vid_is_dsa_8021q(u16 vid) +{ + u16 rsv = (vid & DSA_8021Q_RSV_MASK) >> DSA_8021Q_RSV_SHIFT; + + return rsv == DSA_8021Q_RSV_VAL; +} +EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); + +static struct dsa_tag_8021q_vlan * +dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) +{ + struct dsa_tag_8021q_vlan *v; + + list_for_each_entry(v, &ctx->vlans, list) + if (v->vid == vid && v->port == port) + return v; + + return NULL; +} + +static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, + u16 flags) +{ + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; + struct dsa_tag_8021q_vlan *v; + int port = dp->index; + int err; - slave = dsa_to_port(ds, port)->slave; + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); - err = br_vlan_get_pvid(slave, &pvid); - if (!pvid || err < 0) - /* There is no pvid on the bridge for this port, which is - * perfectly valid. Nothing to restore, bye-bye! - */ + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (v) { + refcount_inc(&v->refcount); return 0; + } + + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; - err = br_vlan_get_info(slave, pvid, &vinfo); - if (err < 0) { - dev_err(ds->dev, "Couldn't determine PVID attributes\n"); + err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); + if (err) { + kfree(v); return err; } - return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags); + v->vid = vid; + v->port = port; + refcount_set(&v->refcount, 1); + list_add_tail(&v->list, &ctx->vlans); + + return 0; } -/* If @enabled is true, installs @vid with @flags into the switch port's HW - * filter. - * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the - * user explicitly configured this @vid through the bridge core, then the @vid - * is installed again, but this time with the flags from the bridge layer. - */ -static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, - u16 flags, bool enabled) +static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) { - struct dsa_port *dp = dsa_to_port(ds, port); - struct bridge_vlan_info vinfo; + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; + struct dsa_tag_8021q_vlan *v; + int port = dp->index; int err; - if (enabled) - return dsa_port_vid_add(dp, vid, flags); + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_del(ds, port, vid); - err = dsa_port_vid_del(dp, vid); - if (err < 0) - return err; + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (!v) + return -ENOENT; - /* Nothing to restore from the bridge for a non-user port. - * The CPU port VLANs are restored implicitly with the user ports, - * similar to how the bridge does in dsa_slave_vlan_add and - * dsa_slave_vlan_del. - */ - if (!dsa_is_user_port(ds, port)) + if (!refcount_dec_and_test(&v->refcount)) return 0; - err = br_vlan_get_info(dp->slave, vid, &vinfo); - /* Couldn't determine bridge attributes for this vid, - * it means the bridge had not configured it. + err = ds->ops->tag_8021q_vlan_del(ds, port, vid); + if (err) { + refcount_inc(&v->refcount); + return err; + } + + list_del(&v->list); + kfree(v); + + return 0; +} + +static bool +dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + return dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp) || dp == info->dp; +} + +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + struct dsa_port *dp; + int err; + + /* Since we use dsa_broadcast(), there might be other switches in other + * trees which don't support tag_8021q, so don't return an error. + * Or they might even support tag_8021q but have not registered yet to + * use it (maybe they use another tagger currently). */ - if (err < 0) + if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) return 0; - /* Restore the VID from the bridge */ - err = dsa_port_vid_add(dp, vid, vinfo.flags); - if (err < 0) - return err; + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { + u16 flags = 0; + + if (dsa_port_is_user(dp)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID; + + err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid, + flags); + if (err) + return err; + } + } - vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID; + return 0; +} + +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + struct dsa_port *dp; + int err; + + if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) + return 0; + + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { + err = dsa_port_do_tag_8021q_vlan_del(dp, info->vid); + if (err) + return err; + } + } - return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags); + return 0; } -/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single - * front-panel switch port (here swp0). +/* There are 2 ways of offloading tag_8021q VLANs. * - * Port identification through VLAN (802.1Q) tags has different requirements - * for it to work effectively: - * - On RX (ingress from network): each front-panel port must have a pvid - * that uniquely identifies it, and the egress of this pvid must be tagged - * towards the CPU port, so that software can recover the source port based - * on the VID in the frame. But this would only work for standalone ports; - * if bridged, this VLAN setup would break autonomous forwarding and would - * force all switched traffic to pass through the CPU. So we must also make - * the other front-panel ports members of this VID we're adding, albeit - * we're not making it their PVID (they'll still have their own). - * By the way - just because we're installing the same VID in multiple - * switch ports doesn't mean that they'll start to talk to one another, even - * while not bridged: the final forwarding decision is still an AND between - * the L2 forwarding information (which is limiting forwarding in this case) - * and the VLAN-based restrictions (of which there are none in this case, - * since all ports are members). - * - On TX (ingress from CPU and towards network) we are faced with a problem. - * If we were to tag traffic (from within DSA) with the port's pvid, all - * would be well, assuming the switch ports were standalone. Frames would - * have no choice but to be directed towards the correct front-panel port. - * But because we also want the RX VLAN to not break bridging, then - * inevitably that means that we have to give them a choice (of what - * front-panel port to go out on), and therefore we cannot steer traffic - * based on the RX VID. So what we do is simply install one more VID on the - * front-panel and CPU ports, and profit off of the fact that steering will - * work just by virtue of the fact that there is only one other port that's - * a member of the VID we're tagging the traffic with - the desired one. + * One is to use a hardware TCAM to push the port's standalone VLAN into the + * frame when forwarding it to the CPU, as an egress modification rule on the + * CPU port. This is preferable because it has no side effects for the + * autonomous forwarding path, and accomplishes tag_8021q's primary goal of + * identifying the source port of each packet based on VLAN ID. * - * So at the end, each front-panel port will have one RX VID (also the PVID), - * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU - * port will have the RX and TX VIDs of all front-panel ports, and on top of - * that, is also tagged-input and tagged-output (VLAN trunk). + * The other is to commit the tag_8021q VLAN as a PVID to the VLAN table, and + * to configure the port as VLAN-unaware. This is less preferable because + * unique source port identification can only be done for standalone ports; + * under a VLAN-unaware bridge, all ports share the same tag_8021q VLAN as + * PVID, and under a VLAN-aware bridge, packets received by software will not + * have tag_8021q VLANs appended, just bridge VLANs. * - * CPU port CPU port - * +-------------+-----+-------------+ +-------------+-----+-------------+ - * | RX VID | | | | TX VID | | | - * | of swp0 | | | | of swp0 | | | - * | +-----+ | | +-----+ | - * | ^ T | | | Tagged | - * | | | | | ingress | - * | +-------+---+---+-------+ | | +-----------+ | - * | | | | | | | | Untagged | - * | | U v U v U v | | v egress | - * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ | - * | | | | | | | | | | | | | | | | | | | | - * | |PVID | | | | | | | | | | | | | | | | | | - * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ - * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 + * For tag_8021q implementations of the second type, this method is used to + * replace the standalone tag_8021q VLAN of a port with the tag_8021q VLAN to + * be used for VLAN-unaware bridging. */ -int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) { - int upstream = dsa_upstream_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ds, port); - int i, err; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 standalone_vid, bridge_vid; + int err; - /* The CPU port is implicitly configured by - * configuring the front-panel ports + /* Delete the standalone VLAN of the port and replace it with a + * bridging VLAN */ - if (!dsa_is_user_port(ds, port)) - return 0; + standalone_vid = dsa_tag_8021q_standalone_vid(dp); + bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num); + + err = dsa_port_tag_8021q_vlan_add(dp, bridge_vid, true); + if (err) + return err; + + dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false); + + return 0; +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join); + +void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + u16 standalone_vid, bridge_vid; + int err; - /* Add this user port's RX VID to the membership list of all others - * (including itself). This is so that bridging will not be hindered. - * L2 forwarding rules still take precedence when there are no VLAN - * restrictions, so there are no concerns about leaking traffic. + /* Delete the bridging VLAN of the port and replace it with a + * standalone VLAN */ - for (i = 0; i < ds->num_ports; i++) { - u16 flags; + standalone_vid = dsa_tag_8021q_standalone_vid(dp); + bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num); - if (i == upstream) - continue; - else if (i == port) - /* The RX VID is pvid on this port */ - flags = BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_PVID; - else - /* The RX VID is a regular VLAN on all others */ - flags = BRIDGE_VLAN_INFO_UNTAGGED; - - err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled); - if (err) { - dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); - return err; - } + err = dsa_port_tag_8021q_vlan_add(dp, standalone_vid, false); + if (err) { + dev_err(ds->dev, + "Failed to delete tag_8021q standalone VLAN %d from port %d: %pe\n", + standalone_vid, port, ERR_PTR(err)); } - /* CPU port needs to see this port's RX VID - * as tagged egress. + dsa_port_tag_8021q_vlan_del(dp, bridge_vid, true); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_leave); + +/* Set up a port's standalone tag_8021q VLAN */ +static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 vid = dsa_tag_8021q_standalone_vid(dp); + struct net_device *master; + int err; + + /* The CPU port is implicitly configured by + * configuring the front-panel ports */ - err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled); + if (!dsa_port_is_user(dp)) + return 0; + + master = dsa_port_to_master(dp); + + err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { - dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); + dev_err(ds->dev, + "Failed to apply standalone VID %d to port %d: %pe\n", + vid, port, ERR_PTR(err)); return err; } - /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, - enabled); - if (err) { - dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, port, err); - return err; + /* Add the VLAN to the master's RX filter. */ + vlan_vid_add(master, ctx->proto, vid); + + return err; +} + +static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 vid = dsa_tag_8021q_standalone_vid(dp); + struct net_device *master; + + /* The CPU port is implicitly configured by + * configuring the front-panel ports + */ + if (!dsa_port_is_user(dp)) + return; + + master = dsa_port_to_master(dp); + + dsa_port_tag_8021q_vlan_del(dp, vid, false); + + vlan_vid_del(master, ctx->proto, vid); +} + +static int dsa_tag_8021q_setup(struct dsa_switch *ds) +{ + int err, port; + + ASSERT_RTNL(); + + for (port = 0; port < ds->num_ports; port++) { + err = dsa_tag_8021q_port_setup(ds, port); + if (err < 0) { + dev_err(ds->dev, + "Failed to setup VLAN tagging for port %d: %pe\n", + port, ERR_PTR(err)); + return err; + } } - err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled); - if (err) { - dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, upstream, err); - return err; + + return 0; +} + +static void dsa_tag_8021q_teardown(struct dsa_switch *ds) +{ + int port; + + ASSERT_RTNL(); + + for (port = 0; port < ds->num_ports; port++) + dsa_tag_8021q_port_teardown(ds, port); +} + +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) +{ + struct dsa_8021q_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->proto = proto; + ctx->ds = ds; + + INIT_LIST_HEAD(&ctx->vlans); + + ds->tag_8021q_ctx = ctx; + + return dsa_tag_8021q_setup(ds); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_register); + +void dsa_tag_8021q_unregister(struct dsa_switch *ds) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_tag_8021q_vlan *v, *n; + + dsa_tag_8021q_teardown(ds); + + list_for_each_entry_safe(v, n, &ctx->vlans, list) { + list_del(&v->list); + kfree(v); } - if (!enabled) - err = dsa_8021q_restore_pvid(ds, port); + ds->tag_8021q_ctx = NULL; - return err; + kfree(ctx); } -EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci) @@ -298,47 +442,57 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -/* In the DSA packet_type handler, skb->data points in the middle of the VLAN - * tag, after tpid and before tci. This is because so far, ETH_HLEN - * (DMAC, SMAC, EtherType) bytes were pulled. - * There are 2 bytes of VLAN tag left in skb->data, and upper - * layers expect the 'real' EtherType to be consumed as well. - * Coincidentally, a VLAN header is also of the same size as - * the number of bytes that need to be pulled. - * - * skb_mac_header skb->data - * | | - * v v - * | | | | | | | | | | | | | | | | | | | - * +-----------------------+-----------------------+-------+-------+-------+ - * | Destination MAC | Source MAC | TPID | TCI | EType | - * +-----------------------+-----------------------+-------+-------+-------+ - * ^ | | - * |<--VLAN_HLEN-->to <---VLAN_HLEN---> - * from | - * >>>>>>> v - * >>>>>>> | | | | | | | | | | | | | | | - * >>>>>>> +-----------------------+-----------------------+-------+ - * >>>>>>> | Destination MAC | Source MAC | EType | - * +-----------------------+-----------------------+-------+ - * ^ ^ - * (now part of | | - * skb->head) skb_mac_header skb->data - */ -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid) { - u8 *from = skb_mac_header(skb); - u8 *dest = from + VLAN_HLEN; + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + + if (WARN_ON(!vbid)) + return NULL; + + dsa_tree_for_each_user_port(dp, dst) { + if (!dp->bridge) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + if (dp->cpu_dp != cpu_dp) + continue; - memmove(dest, from, ETH_HLEN - VLAN_HLEN); - skb_pull(skb, VLAN_HLEN); - skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb_reset_mac_len(skb); - skb_pull_rcsum(skb, ETH_HLEN); + if (dsa_port_bridge_num_get(dp) == vbid) + return dp->slave; + } - return skb; + return NULL; } -EXPORT_SYMBOL_GPL(dsa_8021q_remove_header); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); + +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid) +{ + u16 vid, tci; + + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + __vlan_hwaccel_clear_tag(skb); + } else { + skb_push_rcsum(skb, ETH_HLEN); + __skb_vlan_pop(skb, &tci); + skb_pull_rcsum(skb, ETH_HLEN); + } -MODULE_LICENSE("GPL v2"); + vid = tci & VLAN_VID_MASK; + + *source_port = dsa_8021q_rx_source_port(vid); + *switch_id = dsa_8021q_rx_switch_id(vid); + + if (vbid) + *vbid = dsa_tag_8021q_rx_vbid(vid); + + skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rcv); diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 55b00694cdba..8a02ac44282f 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -31,9 +31,6 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, __le16 *phdr; u16 hdr; - if (skb_cow_head(skb, AR9331_HDR_LEN) < 0) - return NULL; - phdr = skb_push(skb, AR9331_HDR_LEN); hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION); @@ -47,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, } static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, - struct net_device *ndev, - struct packet_type *pt) + struct net_device *ndev) { u8 ver, port; u16 hdr; @@ -88,7 +84,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = { .proto = DSA_TAG_PROTO_AR9331, .xmit = ar9331_tag_xmit, .rcv = ar9331_tag_rcv, - .overhead = AR9331_HDR_LEN, + .needed_headroom = AR9331_HDR_LEN, }; MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 9c3114179690..16889ea3e0a7 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -5,18 +5,36 @@ * Copyright (C) 2014 Broadcom Corporation */ +#include <linux/dsa/brcm.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> #include "dsa_priv.h" -/* This tag length is 4 bytes, older ones were 6 bytes, we do not - * handle them - */ +/* Legacy Broadcom tag (6 bytes) */ +#define BRCM_LEG_TAG_LEN 6 + +/* Type fields */ +/* 1st byte in the tag */ +#define BRCM_LEG_TYPE_HI 0x88 +/* 2nd byte in the tag */ +#define BRCM_LEG_TYPE_LO 0x74 + +/* Tag fields */ +/* 3rd byte in the tag */ +#define BRCM_LEG_UNICAST (0 << 5) +#define BRCM_LEG_MULTICAST (1 << 5) +#define BRCM_LEG_EGRESS (2 << 5) +#define BRCM_LEG_INGRESS (3 << 5) + +/* 6th byte in the tag */ +#define BRCM_LEG_PORT_ID (0xf) + +/* Newer Broadcom tag (4 bytes) */ #define BRCM_TAG_LEN 4 -/* Tag is constructed and desconstructed using byte by byte access +/* Tag is constructed and deconstructed using byte by byte access * because the tag is placed after the MAC Source Address, which does * not make it 4-bytes aligned, so this might cause unaligned accesses * on most systems where this is used. @@ -66,9 +84,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; - if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) - return NULL; - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -84,11 +99,11 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, skb_push(skb, BRCM_TAG_LEN); if (offset) - memmove(skb->data, skb->data + BRCM_TAG_LEN, offset); + dsa_alloc_etype_header(skb, BRCM_TAG_LEN); brcm_tag = skb->data + offset; - /* Set the ingress opcode, traffic class, tag enforcment is + /* Set the ingress opcode, traffic class, tag enforcement is * deprecated */ brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | @@ -107,9 +122,20 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, return skb; } +/* Frames with this tag have one of these two layouts: + * ----------------------------------- + * | MAC DA | MAC SA | 4b tag | Type | DSA_TAG_PROTO_BRCM + * ----------------------------------- + * ----------------------------------- + * | 4b tag | MAC DA | MAC SA | Type | DSA_TAG_PROTO_BRCM_PREPEND + * ----------------------------------- + * In both cases, at receive time, skb->data points 2 bytes before the actual + * Ethernet type field and we have an offset of 4bytes between where skb->data + * and where the payload starts. So the same low-level receive function can be + * used. + */ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, unsigned int offset) { int source_port; @@ -140,6 +166,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); + dsa_default_offload_fwd_mark(skb); + return skb; } #endif @@ -153,20 +181,16 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *nskb; /* skb->data points to the EtherType, the tag is right before it */ - nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + nskb = brcm_tag_rcv_ll(skb, dev, 2); if (!nskb) return nskb; - /* Move the Ethernet DA and SA */ - memmove(nskb->data - ETH_HLEN, - nskb->data - ETH_HLEN - BRCM_TAG_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, BRCM_TAG_LEN); return nskb; } @@ -176,13 +200,90 @@ static const struct dsa_device_ops brcm_netdev_ops = { .proto = DSA_TAG_PROTO_BRCM, .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, - .overhead = BRCM_TAG_LEN, + .needed_headroom = BRCM_TAG_LEN, }; DSA_TAG_DRIVER(brcm_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM); #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) +static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *brcm_tag; + + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 70 bytes + * (including FCS and tag) because the length verification is done after + * the Broadcom tag is stripped off the ingress packet. + * + * Let dsa_slave_xmit() free the SKB + */ + if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false)) + return NULL; + + skb_push(skb, BRCM_LEG_TAG_LEN); + + dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN); + + brcm_tag = skb->data + 2 * ETH_ALEN; + + /* Broadcom tag type */ + brcm_tag[0] = BRCM_LEG_TYPE_HI; + brcm_tag[1] = BRCM_LEG_TYPE_LO; + + /* Broadcom tag value */ + brcm_tag[2] = BRCM_LEG_EGRESS; + brcm_tag[3] = 0; + brcm_tag[4] = 0; + brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID; + + return skb; +} + +static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + int source_port; + u8 *brcm_tag; + + if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID))) + return NULL; + + brcm_tag = dsa_etype_header_pos_rx(skb); + + source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) + return NULL; + + /* Remove Broadcom tag and update checksum */ + skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + + dsa_default_offload_fwd_mark(skb); + + dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); + + return skb; +} + +static const struct dsa_device_ops brcm_legacy_netdev_ops = { + .name = "brcm-legacy", + .proto = DSA_TAG_PROTO_BRCM_LEGACY, + .xmit = brcm_leg_tag_xmit, + .rcv = brcm_leg_tag_rcv, + .needed_headroom = BRCM_LEG_TAG_LEN, +}; + +DSA_TAG_DRIVER(brcm_legacy_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY); +#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */ + #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, struct net_device *dev) @@ -192,11 +293,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, } static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* tag is prepended to the packet */ - return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); + return brcm_tag_rcv_ll(skb, dev, ETH_HLEN); } static const struct dsa_device_ops brcm_prepend_netdev_ops = { @@ -204,7 +304,7 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = { .proto = DSA_TAG_PROTO_BRCM_PREPEND, .xmit = brcm_tag_xmit_prepend, .rcv = brcm_tag_rcv_prepend, - .overhead = BRCM_TAG_LEN, + .needed_headroom = BRCM_TAG_LEN, }; DSA_TAG_DRIVER(brcm_prepend_netdev_ops); @@ -215,6 +315,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = { #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) &DSA_TAG_DRIVER_NAME(brcm_netdev_ops), #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) + &DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops), +#endif #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) &DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops), #endif diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 7ddec9794477..e4b6e3f2a3db 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -1,9 +1,51 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Regular and Ethertype DSA tagging * Copyright (c) 2008-2009 Marvell Semiconductor + * + * Regular DSA + * ----------- + + * For untagged (in 802.1Q terms) packets, the switch will splice in + * the tag between the SA and the ethertype of the original + * packet. Tagged frames will instead have their outermost .1Q tag + * converted to a DSA tag. It expects the same layout when receiving + * packets from the CPU. + * + * Example: + * + * .----.----.----.--------- + * Pu: | DA | SA | ET | Payload ... + * '----'----'----'--------- + * 6 6 2 N + * .----.----.--------.-----.----.--------- + * Pt: | DA | SA | 0x8100 | TCI | ET | Payload ... + * '----'----'--------'-----'----'--------- + * 6 6 2 2 2 N + * .----.----.-----.----.--------- + * Pd: | DA | SA | DSA | ET | Payload ... + * '----'----'-----'----'--------- + * 6 6 4 2 N + * + * No matter if a packet is received untagged (Pu) or tagged (Pt), + * they will both have the same layout (Pd) when they are sent to the + * CPU. This is done by ignoring 802.3, replacing the ethertype field + * with more metadata, among which is a bit to signal if the original + * packet was tagged or not. + * + * Ethertype DSA + * ------------- + * Uses the exact same tag format as regular DSA, but also includes a + * proper ethertype field (which the mv88e6xxx driver sets to + * ETH_P_EDSA/0xdada) followed by two zero bytes: + * + * .----.----.--------.--------.-----.----.--------- + * | DA | SA | 0xdada | 0x0000 | DSA | ET | Payload ... + * '----'----'--------'--------'-----'----'--------- + * 6 6 2 2 4 2 N */ +#include <linux/dsa/mv88e6xxx.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> @@ -12,95 +54,241 @@ #define DSA_HLEN 4 -static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) +/** + * enum dsa_cmd - DSA Command + * @DSA_CMD_TO_CPU: Set on packets that were trapped or mirrored to + * the CPU port. This is needed to implement control protocols, + * e.g. STP and LLDP, that must not allow those control packets to + * be switched according to the normal rules. + * @DSA_CMD_FROM_CPU: Used by the CPU to send a packet to a specific + * port, ignoring all the barriers that the switch normally + * enforces (VLANs, STP port states etc.). No source address + * learning takes place. "sudo send packet" + * @DSA_CMD_TO_SNIFFER: Set on the copies of packets that matched some + * user configured ingress or egress monitor criteria. These are + * forwarded by the switch tree to the user configured ingress or + * egress monitor port, which can be set to the CPU port or a + * regular port. If the destination is a regular port, the tag + * will be removed before egressing the port. If the destination + * is the CPU port, the tag will not be removed. + * @DSA_CMD_FORWARD: This tag is used on all bulk traffic passing + * through the switch tree, including the flows that are directed + * towards the CPU. Its device/port tuple encodes the original + * source port on which the packet ingressed. It can also be used + * on transmit by the CPU to defer the forwarding decision to the + * hardware, based on the current config of PVT/VTU/ATU + * etc. Source address learning takes places if enabled on the + * receiving DSA/CPU port. + */ +enum dsa_cmd { + DSA_CMD_TO_CPU = 0, + DSA_CMD_FROM_CPU = 1, + DSA_CMD_TO_SNIFFER = 2, + DSA_CMD_FORWARD = 3 +}; + +/** + * enum dsa_code - TO_CPU Code + * + * @DSA_CODE_MGMT_TRAP: DA was classified as a management + * address. Typical examples include STP BPDUs and LLDP. + * @DSA_CODE_FRAME2REG: Response to a "remote management" request. + * @DSA_CODE_IGMP_MLD_TRAP: IGMP/MLD signaling. + * @DSA_CODE_POLICY_TRAP: Frame matched some policy configuration on + * the device. Typical examples are matching on DA/SA/VID and DHCP + * snooping. + * @DSA_CODE_ARP_MIRROR: The name says it all really. + * @DSA_CODE_POLICY_MIRROR: Same as @DSA_CODE_POLICY_TRAP, but the + * particular policy was set to trigger a mirror instead of a + * trap. + * @DSA_CODE_RESERVED_6: Unused on all devices up to at least 6393X. + * @DSA_CODE_RESERVED_7: Unused on all devices up to at least 6393X. + * + * A 3-bit code is used to relay why a particular frame was sent to + * the CPU. We only use this to determine if the packet was mirrored + * or trapped, i.e. whether the packet has been forwarded by hardware + * or not. + * + * This is the superset of all possible codes. Any particular device + * may only implement a subset. + */ +enum dsa_code { + DSA_CODE_MGMT_TRAP = 0, + DSA_CODE_FRAME2REG = 1, + DSA_CODE_IGMP_MLD_TRAP = 2, + DSA_CODE_POLICY_TRAP = 3, + DSA_CODE_ARP_MIRROR = 4, + DSA_CODE_POLICY_MIRROR = 5, + DSA_CODE_RESERVED_6 = 6, + DSA_CODE_RESERVED_7 = 7 +}; + +static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, + u8 extra) { struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *br_dev; + u8 tag_dev, tag_port; + enum dsa_cmd cmd; u8 *dsa_header; - /* - * Convert the outermost 802.1q tag to a DSA tag for tagged - * packets, or insert a DSA tag between the addresses and - * the ethertype field for untagged packets. - */ - if (skb->protocol == htons(ETH_P_8021Q)) { - if (skb_cow_head(skb, 0) < 0) - return NULL; + if (skb->offload_fwd_mark) { + unsigned int bridge_num = dsa_port_bridge_num_get(dp); + struct dsa_switch_tree *dst = dp->ds->dst; - /* - * Construct tagged FROM_CPU DSA tag from 802.1q tag. - */ - dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x60 | dp->ds->index; - dsa_header[1] = dp->index << 3; + cmd = DSA_CMD_FORWARD; - /* - * Move CFI field from byte 2 to byte 1. + /* When offloading forwarding for a bridge, inject FORWARD + * packets on behalf of a virtual switch device with an index + * past the physical switches. */ + tag_dev = dst->last_switch + bridge_num; + tag_port = 0; + } else { + cmd = DSA_CMD_FROM_CPU; + tag_dev = dp->ds->index; + tag_port = dp->index; + } + + br_dev = dsa_port_bridge_dev_get(dp); + + /* If frame is already 802.1Q tagged, we can convert it to a DSA + * tag (avoiding a memmove), but only if the port is standalone + * (in which case we always send FROM_CPU) or if the port's + * bridge has VLAN filtering enabled (in which case the CPU port + * will be a member of the VLAN). + */ + if (skb->protocol == htons(ETH_P_8021Q) && + (!br_dev || br_vlan_enabled(br_dev))) { + if (extra) { + skb_push(skb, extra); + dsa_alloc_etype_header(skb, extra); + } + + /* Construct tagged DSA tag from 802.1Q tag. */ + dsa_header = dsa_etype_header_pos_tx(skb) + extra; + dsa_header[0] = (cmd << 6) | 0x20 | tag_dev; + dsa_header[1] = tag_port << 3; + + /* Move CFI field from byte 2 to byte 1. */ if (dsa_header[2] & 0x10) { dsa_header[1] |= 0x01; dsa_header[2] &= ~0x10; } } else { - if (skb_cow_head(skb, DSA_HLEN) < 0) - return NULL; - skb_push(skb, DSA_HLEN); + u16 vid; - memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + vid = br_dev ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE; - /* - * Construct untagged FROM_CPU DSA tag. - */ - dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x40 | dp->ds->index; - dsa_header[1] = dp->index << 3; - dsa_header[2] = 0x00; - dsa_header[3] = 0x00; + skb_push(skb, DSA_HLEN + extra); + dsa_alloc_etype_header(skb, DSA_HLEN + extra); + + /* Construct DSA header from untagged frame. */ + dsa_header = dsa_etype_header_pos_tx(skb) + extra; + + dsa_header[0] = (cmd << 6) | tag_dev; + dsa_header[1] = tag_port << 3; + dsa_header[2] = vid >> 8; + dsa_header[3] = vid & 0xff; } return skb; } -static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, + u8 extra) { + bool trap = false, trunk = false; + int source_device, source_port; + enum dsa_code code; + enum dsa_cmd cmd; u8 *dsa_header; - int source_device; - int source_port; - if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) - return NULL; + /* The ethertype field is part of the DSA header. */ + dsa_header = dsa_etype_header_pos_rx(skb); - /* - * The ethertype field is part of the DSA header. - */ - dsa_header = skb->data - 2; + cmd = dsa_header[0] >> 6; + switch (cmd) { + case DSA_CMD_FORWARD: + trunk = !!(dsa_header[1] & 4); + break; - /* - * Check that frame type is either TO_CPU or FORWARD. - */ - if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) + case DSA_CMD_TO_CPU: + code = (dsa_header[1] & 0x6) | ((dsa_header[2] >> 4) & 1); + + switch (code) { + case DSA_CODE_FRAME2REG: + /* Remote management is not implemented yet, + * drop. + */ + return NULL; + case DSA_CODE_ARP_MIRROR: + case DSA_CODE_POLICY_MIRROR: + /* Mark mirrored packets to notify any upper + * device (like a bridge) that forwarding has + * already been done by hardware. + */ + break; + case DSA_CODE_MGMT_TRAP: + case DSA_CODE_IGMP_MLD_TRAP: + case DSA_CODE_POLICY_TRAP: + /* Traps have, by definition, not been + * forwarded by hardware, so don't mark them. + */ + trap = true; + break; + default: + /* Reserved code, this could be anything. Drop + * seems like the safest option. + */ + return NULL; + } + + break; + + default: return NULL; + } - /* - * Determine source device and port. - */ source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (trunk) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_lag *lag; + + /* The exact source port is not available in the tag, + * so we inject the frame directly on the upper + * team/bond. + */ + lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1); + skb->dev = lag ? lag->dev : NULL; + } else { + skb->dev = dsa_master_find_slave(dev, source_device, + source_port); + } + if (!skb->dev) return NULL; - /* - * Convert the DSA header to an 802.1q header if the 'tagged' - * bit in the DSA header is set. If the 'tagged' bit is clear, - * delete the DSA header entirely. + /* When using LAG offload, skb->dev is not a DSA slave interface, + * so we cannot call dsa_default_offload_fwd_mark and we need to + * special-case it. + */ + if (trunk) + skb->offload_fwd_mark = true; + else if (!trap) + dsa_default_offload_fwd_mark(skb); + + /* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q + * tag, and delete the ethertype (extra) if applicable. If the + * 'tagged' bit is cleared; delete the DSA tag, and ethertype + * if applicable. */ if (dsa_header[0] & 0x20) { u8 new_header[4]; - /* - * Insert 802.1q ethertype and copy the VLAN-related + /* Insert 802.1Q ethertype and copy the VLAN-related * fields, but clear the bit that will hold CFI (since * DSA uses that bit location for another purpose). */ @@ -109,16 +297,13 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, new_header[2] = dsa_header[2] & ~0x10; new_header[3] = dsa_header[3]; - /* - * Move CFI bit from its place in the DSA header to - * its 802.1q-designated place. + /* Move CFI bit from its place in the DSA header to + * its 802.1Q-designated place. */ if (dsa_header[1] & 0x01) new_header[2] |= 0x10; - /* - * Update packet checksum if skb is CHECKSUM_COMPLETE. - */ + /* Update packet checksum if skb is CHECKSUM_COMPLETE. */ if (skb->ip_summed == CHECKSUM_COMPLETE) { __wsum c = skb->csum; c = csum_add(c, csum_partial(new_header + 2, 2, 0)); @@ -127,39 +312,95 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, } memcpy(dsa_header, new_header, DSA_HLEN); + + if (extra) + dsa_strip_etype_header(skb, extra); } else { - /* - * Remove DSA tag and update checksum. - */ skb_pull_rcsum(skb, DSA_HLEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - DSA_HLEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, DSA_HLEN + extra); } - skb->offload_fwd_mark = 1; - return skb; } -static int dsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, - int *offset) +#if IS_ENABLED(CONFIG_NET_DSA_TAG_DSA) + +static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) { - *offset = 4; - *proto = ((__be16 *)skb->data)[1]; - return 0; + return dsa_xmit_ll(skb, dev, 0); +} + +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev) +{ + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + return NULL; + + return dsa_rcv_ll(skb, dev, 0); } static const struct dsa_device_ops dsa_netdev_ops = { - .name = "dsa", - .proto = DSA_TAG_PROTO_DSA, - .xmit = dsa_xmit, - .rcv = dsa_rcv, - .flow_dissect = dsa_tag_flow_dissect, - .overhead = DSA_HLEN, + .name = "dsa", + .proto = DSA_TAG_PROTO_DSA, + .xmit = dsa_xmit, + .rcv = dsa_rcv, + .needed_headroom = DSA_HLEN, }; -MODULE_LICENSE("GPL"); +DSA_TAG_DRIVER(dsa_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_DSA); +#endif /* CONFIG_NET_DSA_TAG_DSA */ + +#if IS_ENABLED(CONFIG_NET_DSA_TAG_EDSA) + +#define EDSA_HLEN 8 + +static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + u8 *edsa_header; + + skb = dsa_xmit_ll(skb, dev, EDSA_HLEN - DSA_HLEN); + if (!skb) + return NULL; + + edsa_header = dsa_etype_header_pos_tx(skb); + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + return skb; +} + +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev) +{ + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + return NULL; + + skb_pull_rcsum(skb, EDSA_HLEN - DSA_HLEN); -module_dsa_tag_driver(dsa_netdev_ops); + return dsa_rcv_ll(skb, dev, EDSA_HLEN - DSA_HLEN); +} + +static const struct dsa_device_ops edsa_netdev_ops = { + .name = "edsa", + .proto = DSA_TAG_PROTO_EDSA, + .xmit = edsa_xmit, + .rcv = edsa_rcv, + .needed_headroom = EDSA_HLEN, +}; + +DSA_TAG_DRIVER(edsa_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA); +#endif /* CONFIG_NET_DSA_TAG_EDSA */ + +static struct dsa_tag_driver *dsa_tag_drivers[] = { +#if IS_ENABLED(CONFIG_NET_DSA_TAG_DSA) + &DSA_TAG_DRIVER_NAME(dsa_netdev_ops), +#endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_EDSA) + &DSA_TAG_DRIVER_NAME(edsa_netdev_ops), +#endif +}; + +module_dsa_tag_drivers(dsa_tag_drivers); + +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c deleted file mode 100644 index e8eaa804ccb9..000000000000 --- a/net/dsa/tag_edsa.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * net/dsa/tag_edsa.c - Ethertype DSA tagging - * Copyright (c) 2008-2009 Marvell Semiconductor - */ - -#include <linux/etherdevice.h> -#include <linux/list.h> -#include <linux/slab.h> - -#include "dsa_priv.h" - -#define DSA_HLEN 4 -#define EDSA_HLEN 8 - -static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - u8 *edsa_header; - - /* - * Convert the outermost 802.1q tag to a DSA tag and prepend - * a DSA ethertype field is the packet is tagged, or insert - * a DSA ethertype plus DSA tag between the addresses and the - * current ethertype field if the packet is untagged. - */ - if (skb->protocol == htons(ETH_P_8021Q)) { - if (skb_cow_head(skb, DSA_HLEN) < 0) - return NULL; - skb_push(skb, DSA_HLEN); - - memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); - - /* - * Construct tagged FROM_CPU DSA tag from 802.1q tag. - */ - edsa_header = skb->data + 2 * ETH_ALEN; - edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; - edsa_header[1] = ETH_P_EDSA & 0xff; - edsa_header[2] = 0x00; - edsa_header[3] = 0x00; - edsa_header[4] = 0x60 | dp->ds->index; - edsa_header[5] = dp->index << 3; - - /* - * Move CFI field from byte 6 to byte 5. - */ - if (edsa_header[6] & 0x10) { - edsa_header[5] |= 0x01; - edsa_header[6] &= ~0x10; - } - } else { - if (skb_cow_head(skb, EDSA_HLEN) < 0) - return NULL; - skb_push(skb, EDSA_HLEN); - - memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); - - /* - * Construct untagged FROM_CPU DSA tag. - */ - edsa_header = skb->data + 2 * ETH_ALEN; - edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; - edsa_header[1] = ETH_P_EDSA & 0xff; - edsa_header[2] = 0x00; - edsa_header[3] = 0x00; - edsa_header[4] = 0x40 | dp->ds->index; - edsa_header[5] = dp->index << 3; - edsa_header[6] = 0x00; - edsa_header[7] = 0x00; - } - - return skb; -} - -static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) -{ - u8 *edsa_header; - int source_device; - int source_port; - - if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) - return NULL; - - /* - * Skip the two null bytes after the ethertype. - */ - edsa_header = skb->data + 2; - - /* - * Check that frame type is either TO_CPU or FORWARD. - */ - if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) - return NULL; - - /* - * Determine source device and port. - */ - source_device = edsa_header[0] & 0x1f; - source_port = (edsa_header[1] >> 3) & 0x1f; - - skb->dev = dsa_master_find_slave(dev, source_device, source_port); - if (!skb->dev) - return NULL; - - /* - * If the 'tagged' bit is set, convert the DSA tag to a 802.1q - * tag and delete the ethertype part. If the 'tagged' bit is - * clear, delete the ethertype and the DSA tag parts. - */ - if (edsa_header[0] & 0x20) { - u8 new_header[4]; - - /* - * Insert 802.1q ethertype and copy the VLAN-related - * fields, but clear the bit that will hold CFI (since - * DSA uses that bit location for another purpose). - */ - new_header[0] = (ETH_P_8021Q >> 8) & 0xff; - new_header[1] = ETH_P_8021Q & 0xff; - new_header[2] = edsa_header[2] & ~0x10; - new_header[3] = edsa_header[3]; - - /* - * Move CFI bit from its place in the DSA header to - * its 802.1q-designated place. - */ - if (edsa_header[1] & 0x01) - new_header[2] |= 0x10; - - skb_pull_rcsum(skb, DSA_HLEN); - - /* - * Update packet checksum if skb is CHECKSUM_COMPLETE. - */ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - __wsum c = skb->csum; - c = csum_add(c, csum_partial(new_header + 2, 2, 0)); - c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); - skb->csum = c; - } - - memcpy(edsa_header, new_header, DSA_HLEN); - - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - DSA_HLEN, - 2 * ETH_ALEN); - } else { - /* - * Remove DSA tag and update checksum. - */ - skb_pull_rcsum(skb, EDSA_HLEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - EDSA_HLEN, - 2 * ETH_ALEN); - } - - skb->offload_fwd_mark = 1; - - return skb; -} - -static int edsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, - int *offset) -{ - *offset = 8; - *proto = ((__be16 *)skb->data)[3]; - return 0; -} - -static const struct dsa_device_ops edsa_netdev_ops = { - .name = "edsa", - .proto = DSA_TAG_PROTO_EDSA, - .xmit = edsa_xmit, - .rcv = edsa_rcv, - .flow_dissect = edsa_tag_flow_dissect, - .overhead = EDSA_HLEN, -}; - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA); - -module_dsa_tag_driver(edsa_netdev_ops); diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 408d4af390a0..df7140984da3 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -60,13 +60,8 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - int err; u8 *gswip_tag; - err = skb_cow_head(skb, GSWIP_TX_HEADER_LEN); - if (err) - return NULL; - skb_push(skb, GSWIP_TX_HEADER_LEN); gswip_tag = skb->data; @@ -80,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, } static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int port; u8 *gswip_tag; @@ -108,7 +102,7 @@ static const struct dsa_device_ops gswip_netdev_ops = { .proto = DSA_TAG_PROTO_GSWIP, .xmit = gswip_tag_xmit, .rcv = gswip_tag_rcv, - .overhead = GSWIP_RX_HEADER_LEN, + .needed_headroom = GSWIP_RX_HEADER_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c new file mode 100644 index 000000000000..846588c0070a --- /dev/null +++ b/net/dsa/tag_hellcreek.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * net/dsa/tag_hellcreek.c - Hirschmann Hellcreek switch tag format handling + * + * Copyright (C) 2019,2020 Linutronix GmbH + * Author Kurt Kanzenbach <kurt@linutronix.de> + * + * Based on tag_ksz.c. + */ + +#include <linux/skbuff.h> +#include <net/dsa.h> + +#include "dsa_priv.h" + +#define HELLCREEK_TAG_LEN 1 + +static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *tag; + + /* Calculate checksums (if required) before adding the trailer tag to + * avoid including it in calculations. That would lead to wrong + * checksums after the switch strips the tag. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + return NULL; + + /* Tag encoding */ + tag = skb_put(skb, HELLCREEK_TAG_LEN); + *tag = BIT(dp->index); + + return skb; +} + +static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + /* Tag decoding */ + u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; + unsigned int port = tag[0] & 0x03; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + netdev_warn_once(dev, "Failed to get source port: %d\n", port); + return NULL; + } + + pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops hellcreek_netdev_ops = { + .name = "hellcreek", + .proto = DSA_TAG_PROTO_HELLCREEK, + .xmit = hellcreek_xmit, + .rcv = hellcreek_rcv, + .needed_tailroom = HELLCREEK_TAG_LEN, +}; + +MODULE_LICENSE("Dual MIT/GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_HELLCREEK); + +module_dsa_tag_driver(hellcreek_netdev_ops); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 90d055c4df9e..38fa19c1e2d5 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -6,7 +6,6 @@ #include <linux/etherdevice.h> #include <linux/list.h> -#include <linux/slab.h> #include <net/dsa.h> #include "dsa_priv.h" @@ -14,46 +13,6 @@ #define KSZ_EGRESS_TAG_LEN 1 #define KSZ_INGRESS_TAG_LEN 1 -static struct sk_buff *ksz_common_xmit(struct sk_buff *skb, - struct net_device *dev, int len) -{ - struct sk_buff *nskb; - int padlen; - - padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; - - if (skb_tailroom(skb) >= padlen + len) { - /* Let dsa_slave_xmit() free skb */ - if (__skb_put_padto(skb, skb->len + padlen, false)) - return NULL; - - nskb = skb; - } else { - nskb = alloc_skb(NET_IP_ALIGN + skb->len + - padlen + len, GFP_ATOMIC); - if (!nskb) - return NULL; - skb_reserve(nskb, NET_IP_ALIGN); - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, - skb_network_header(skb) - skb->head); - skb_set_transport_header(nskb, - skb_transport_header(skb) - skb->head); - skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - - /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free - * skb - */ - if (skb_put_padto(nskb, nskb->len + padlen)) - return NULL; - - consume_skb(skb); - } - - return nskb; -} - static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int port, unsigned int len) @@ -64,7 +23,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); - skb->offload_fwd_mark = true; + dsa_default_offload_fwd_mark(skb); return skb; } @@ -90,27 +49,24 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; u8 *tag; u8 *addr; - nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN); - if (!nskb) + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) return NULL; /* Tag encoding */ - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); *tag = 1 << dp->index; if (is_link_local_ether_addr(addr)) *tag |= KSZ8795_TAIL_TAG_OVERRIDE; - return nskb; + return skb; } -static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; @@ -122,7 +78,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ8795, .xmit = ksz8795_xmit, .rcv = ksz8795_rcv, - .overhead = KSZ_INGRESS_TAG_LEN, + .needed_tailroom = KSZ_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz8795_netdev_ops); @@ -155,30 +111,28 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; - u16 *tag; + __be16 *tag; u8 *addr; + u16 val; - nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN); - if (!nskb) + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) return NULL; /* Tag encoding */ - tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); - *tag = BIT(dp->index); + val = BIT(dp->index); if (is_link_local_ether_addr(addr)) - *tag |= KSZ9477_TAIL_TAG_OVERRIDE; + val |= KSZ9477_TAIL_TAG_OVERRIDE; - *tag = cpu_to_be16(*tag); + *tag = cpu_to_be16(val); - return nskb; + return skb; } -static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; @@ -197,7 +151,7 @@ static const struct dsa_device_ops ksz9477_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ9477, .xmit = ksz9477_xmit, .rcv = ksz9477_rcv, - .overhead = KSZ9477_INGRESS_TAG_LEN, + .needed_tailroom = KSZ9477_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz9477_netdev_ops); @@ -210,24 +164,22 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; u8 *addr; u8 *tag; - nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN); - if (!nskb) + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) return NULL; /* Tag encoding */ - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); *tag = BIT(dp->index); if (is_link_local_ether_addr(addr)) *tag |= KSZ9893_TAIL_TAG_OVERRIDE; - return nskb; + return skb; } static const struct dsa_device_ops ksz9893_netdev_ops = { @@ -235,16 +187,75 @@ static const struct dsa_device_ops ksz9893_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ9893, .xmit = ksz9893_xmit, .rcv = ksz9477_rcv, - .overhead = KSZ_INGRESS_TAG_LEN, + .needed_tailroom = KSZ_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz9893_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893); +/* For xmit, 2 bytes are added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : represents tag override, lookup and valid + * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x80=port8) + * + * For rcv, 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : zero-based value represents port + * (eg, 0x00=port1, 0x02=port3, 0x07=port8) + */ +#define LAN937X_EGRESS_TAG_LEN 2 + +#define LAN937X_TAIL_TAG_BLOCKING_OVERRIDE BIT(11) +#define LAN937X_TAIL_TAG_LOOKUP BIT(12) +#define LAN937X_TAIL_TAG_VALID BIT(13) +#define LAN937X_TAIL_TAG_PORT_MASK 7 + +static struct sk_buff *lan937x_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + const struct ethhdr *hdr = eth_hdr(skb); + __be16 *tag; + u16 val; + + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + + tag = skb_put(skb, LAN937X_EGRESS_TAG_LEN); + + val = BIT(dp->index); + + if (is_link_local_ether_addr(hdr->h_dest)) + val |= LAN937X_TAIL_TAG_BLOCKING_OVERRIDE; + + /* Tail tag valid bit - This bit should always be set by the CPU */ + val |= LAN937X_TAIL_TAG_VALID; + + put_unaligned_be16(val, tag); + + return skb; +} + +static const struct dsa_device_ops lan937x_netdev_ops = { + .name = "lan937x", + .proto = DSA_TAG_PROTO_LAN937X, + .xmit = lan937x_xmit, + .rcv = ksz9477_rcv, + .needed_tailroom = LAN937X_EGRESS_TAG_LEN, +}; + +DSA_TAG_DRIVER(lan937x_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN937X); + static struct dsa_tag_driver *dsa_tag_driver_array[] = { &DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops), + &DSA_TAG_DRIVER_NAME(lan937x_netdev_ops), }; module_dsa_tag_drivers(dsa_tag_driver_array); diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index eb0e7a32e53d..98d7d7120bab 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -55,37 +55,28 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - u16 *lan9303_tag; - - /* insert a special VLAN tag between the MAC addresses - * and the current ethertype field. - */ - if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) { - dev_dbg(&dev->dev, - "Cannot make room for the special tag. Dropping packet\n"); - return NULL; - } + __be16 *lan9303_tag; + u16 tag; /* provide 'LAN9303_TAG_LEN' bytes additional space */ skb_push(skb, LAN9303_TAG_LEN); /* make room between MACs and Ether-Type */ - memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, LAN9303_TAG_LEN); + + lan9303_tag = dsa_etype_header_pos_tx(skb); - lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); + tag = lan9303_xmit_use_arl(dp, skb->data) ? + LAN9303_TAG_TX_USE_ALR : + dp->index | LAN9303_TAG_TX_STP_OVERRIDE; lan9303_tag[0] = htons(ETH_P_8021Q); - lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ? - LAN9303_TAG_TX_USE_ALR : - dp->index | LAN9303_TAG_TX_STP_OVERRIDE; - lan9303_tag[1] = htons(lan9303_tag[1]); + lan9303_tag[1] = htons(tag); return skb; } -static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) { - u16 *lan9303_tag; u16 lan9303_tag1; unsigned int source_port; @@ -95,20 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; } - /* '->data' points into the middle of our special VLAN tag information: - * - * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type - * ^ - * ->data - */ - lan9303_tag = (u16 *)(skb->data - 2); - - if (lan9303_tag[0] != htons(ETH_P_8021Q)) { - dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n"); - return NULL; + if (skb_vlan_tag_present(skb)) { + lan9303_tag1 = skb_vlan_tag_get(skb); + __vlan_hwaccel_clear_tag(skb); + } else { + skb_push_rcsum(skb, ETH_HLEN); + __skb_vlan_pop(skb, &lan9303_tag1); + skb_pull_rcsum(skb, ETH_HLEN); } - lan9303_tag1 = ntohs(lan9303_tag[1]); source_port = lan9303_tag1 & 0x3; skb->dev = dsa_master_find_slave(dev, 0, source_port); @@ -117,13 +103,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; } - /* remove the special VLAN tag between the MAC addresses - * and the current ethertype field. - */ - skb_pull_rcsum(skb, 2 + 2); - memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), - 2 * ETH_ALEN); - skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU); + if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU)) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -133,7 +114,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = { .proto = DSA_TAG_PROTO_LAN9303, .xmit = lan9303_xmit, .rcv = lan9303_rcv, - .overhead = LAN9303_TAG_LEN, + .needed_headroom = LAN9303_TAG_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index b5705cba8318..415d8ece242a 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,15 +13,17 @@ #define MTK_HDR_LEN 4 #define MTK_HDR_XMIT_UNTAGGED 0 #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 +#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) +#define MTK_HDR_XMIT_SA_DIS BIT(6) static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 xmit_tpid; u8 *mtk_tag; - bool is_vlan_skb = true; /* Build the special tag after the MAC Source Address. If VLAN header * is present, it's required that VLAN header and special tag is @@ -29,26 +31,29 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * the both special and VLAN tag at the same time and then look up VLAN * table with VID. */ - if (!skb_vlan_tagged(skb)) { - if (skb_cow_head(skb, MTK_HDR_LEN) < 0) - return NULL; - + switch (skb->protocol) { + case htons(ETH_P_8021Q): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100; + break; + case htons(ETH_P_8021AD): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8; + break; + default: + xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); - memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); - is_vlan_skb = false; + dsa_alloc_etype_header(skb, MTK_HDR_LEN); } - mtk_tag = skb->data + 2 * ETH_ALEN; + mtk_tag = dsa_etype_header_pos_tx(skb); /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. */ - mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : - MTK_HDR_XMIT_UNTAGGED; + mtk_tag[0] = xmit_tpid; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; /* Tag control information is kept for 802.1Q */ - if (!is_vlan_skb) { + if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) { mtk_tag[2] = 0; mtk_tag[3] = 0; } @@ -56,28 +61,22 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) { + u16 hdr; int port; - __be16 *phdr, hdr; + __be16 *phdr; if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) return NULL; - /* The MTK header is added by the switch between src addr - * and ethertype at this point, skb->data points to 2 bytes - * after src addr so header should be 2 bytes right before. - */ - phdr = (__be16 *)(skb->data - 2); + phdr = dsa_etype_header_pos_rx(skb); hdr = ntohs(*phdr); /* Remove MTK tag and recalculate checksum. */ skb_pull_rcsum(skb, MTK_HDR_LEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - MTK_HDR_LEN, - 2 * ETH_ALEN); + dsa_strip_etype_header(skb, MTK_HDR_LEN); /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); @@ -86,16 +85,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; - return skb; -} + dsa_default_offload_fwd_mark(skb); -static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, - int *offset) -{ - *offset = 4; - *proto = ((__be16 *)skb->data)[1]; - - return 0; + return skb; } static const struct dsa_device_ops mtk_netdev_ops = { @@ -103,8 +95,7 @@ static const struct dsa_device_ops mtk_netdev_ops = { .proto = DSA_TAG_PROTO_MTK, .xmit = mtk_tag_xmit, .rcv = mtk_tag_rcv, - .flow_dissect = mtk_tag_flow_dissect, - .overhead = MTK_HDR_LEN, + .needed_headroom = MTK_HDR_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 8e3e7283d430..0d81f172b7a6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -1,198 +1,121 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ -#include <soc/mscc/ocelot.h> -#include <linux/packing.h> +#include <linux/dsa/ocelot.h> #include "dsa_priv.h" -/* The CPU injection header and the CPU extraction header can have 3 types of - * prefixes: long, short and no prefix. The format of the header itself is the - * same in all 3 cases. - * - * Extraction with long prefix: - * - * +-------------------+-------------------+------+------+------------+-------+ - * | ff:ff:ff:ff:ff:ff | ff:ff:ff:ff:ff:ff | 8880 | 000a | extraction | frame | - * | | | | | header | | - * +-------------------+-------------------+------+------+------------+-------+ - * 48 bits 48 bits 16 bits 16 bits 128 bits - * - * Extraction with short prefix: - * - * +------+------+------------+-------+ - * | 8880 | 000a | extraction | frame | - * | | | header | | - * +------+------+------------+-------+ - * 16 bits 16 bits 128 bits - * - * Extraction with no prefix: - * - * +------------+-------+ - * | extraction | frame | - * | header | | - * +------------+-------+ - * 128 bits - * - * - * Injection with long prefix: - * - * +-------------------+-------------------+------+------+------------+-------+ - * | any dmac | any smac | 8880 | 000a | injection | frame | - * | | | | | header | | - * +-------------------+-------------------+------+------+------------+-------+ - * 48 bits 48 bits 16 bits 16 bits 128 bits - * - * Injection with short prefix: - * - * +------+------+------------+-------+ - * | 8880 | 000a | injection | frame | - * | | | header | | - * +------+------+------------+-------+ - * 16 bits 16 bits 128 bits - * - * Injection with no prefix: - * - * +------------+-------+ - * | injection | frame | - * | header | | - * +------------+-------+ - * 128 bits - * - * The injection header looks like this (network byte order, bit 127 - * is part of lowest address byte in memory, bit 0 is part of highest - * address byte): - * - * +------+------+------+------+------+------+------+------+ - * 127:120 |BYPASS| MASQ | MASQ_PORT |REW_OP|REW_OP| - * +------+------+------+------+------+------+------+------+ - * 119:112 | REW_OP | - * +------+------+------+------+------+------+------+------+ - * 111:104 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 103: 96 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 95: 88 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 87: 80 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 79: 72 | RSV | - * +------+------+------+------+------+------+------+------+ - * 71: 64 | RSV | DEST | - * +------+------+------+------+------+------+------+------+ - * 63: 56 | DEST | - * +------+------+------+------+------+------+------+------+ - * 55: 48 | RSV | - * +------+------+------+------+------+------+------+------+ - * 47: 40 | RSV | SRC_PORT | RSV |TFRM_TIMER| - * +------+------+------+------+------+------+------+------+ - * 39: 32 | TFRM_TIMER | RSV | - * +------+------+------+------+------+------+------+------+ - * 31: 24 | RSV | DP | POP_CNT | CPUQ | - * +------+------+------+------+------+------+------+------+ - * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE| - * +------+------+------+------+------+------+------+------+ - * 15: 8 | PCP | DEI | VID | - * +------+------+------+------+------+------+------+------+ - * 7: 0 | VID | - * +------+------+------+------+------+------+------+------+ - * - * And the extraction header looks like this: - * - * +------+------+------+------+------+------+------+------+ - * 127:120 | RSV | REW_OP | - * +------+------+------+------+------+------+------+------+ - * 119:112 | REW_OP | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 111:104 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 103: 96 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 95: 88 | REW_VAL | - * +------+------+------+------+------+------+------+------+ - * 87: 80 | REW_VAL | LLEN | - * +------+------+------+------+------+------+------+------+ - * 79: 72 | LLEN | WLEN | - * +------+------+------+------+------+------+------+------+ - * 71: 64 | WLEN | RSV | - * +------+------+------+------+------+------+------+------+ - * 63: 56 | RSV | - * +------+------+------+------+------+------+------+------+ - * 55: 48 | RSV | - * +------+------+------+------+------+------+------+------+ - * 47: 40 | RSV | SRC_PORT | ACL_ID | - * +------+------+------+------+------+------+------+------+ - * 39: 32 | ACL_ID | RSV | SFLOW_ID | - * +------+------+------+------+------+------+------+------+ - * 31: 24 |ACL_HIT| DP | LRN_FLAGS | CPUQ | - * +------+------+------+------+------+------+------+------+ - * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE| - * +------+------+------+------+------+------+------+------+ - * 15: 8 | PCP | DEI | VID | - * +------+------+------+------+------+------+------+------+ - * 7: 0 | VID | - * +------+------+------+------+------+------+------+------+ +/* If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone and VLAN-unaware bridge ports. */ +static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, + u64 *vlan_tci, u64 *tag_type) +{ + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } -static struct sk_buff *ocelot_xmit(struct sk_buff *skb, - struct net_device *netdev) + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + __skb_vlan_pop(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + +static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, + __be32 ifh_prefix, void **ifh) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u64 bypass, dest, src, qos_class, rew_op; struct dsa_switch *ds = dp->ds; - int port = dp->index; - struct ocelot *ocelot = ds->priv; - struct ocelot_port *ocelot_port = ocelot->ports[port]; - u8 *injection; + u64 vlan_tci, tag_type; + void *injection; + __be32 *prefix; + u32 rew_op = 0; + u64 qos_class; - if (unlikely(skb_cow_head(skb, OCELOT_TAG_LEN) < 0)) { - netdev_err(netdev, "Cannot make room for tag.\n"); - return NULL; - } + ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + + qos_class = netdev_get_num_tc(netdev) ? + netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; injection = skb_push(skb, OCELOT_TAG_LEN); + prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN); + *prefix = ifh_prefix; memset(injection, 0, OCELOT_TAG_LEN); + ocelot_ifh_set_bypass(injection, 1); + ocelot_ifh_set_src(injection, ds->num_ports); + ocelot_ifh_set_qos_class(injection, qos_class); + ocelot_ifh_set_vlan_tci(injection, vlan_tci); + ocelot_ifh_set_tag_type(injection, tag_type); - src = dsa_upstream_port(ds, port); - dest = BIT(port); - bypass = true; - qos_class = skb->priority; + rew_op = ocelot_ptp_rew_op(skb); + if (rew_op) + ocelot_ifh_set_rew_op(injection, rew_op); - packing(injection, &bypass, 127, 127, OCELOT_TAG_LEN, PACK, 0); - packing(injection, &dest, 68, 56, OCELOT_TAG_LEN, PACK, 0); - packing(injection, &src, 46, 43, OCELOT_TAG_LEN, PACK, 0); - packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); + *ifh = injection; +} - if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } +static struct sk_buff *ocelot_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + void *injection; - packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); - } + ocelot_xmit_common(skb, netdev, cpu_to_be32(0x8880000a), &injection); + ocelot_ifh_set_dest(injection, BIT_ULL(dp->index)); + + return skb; +} + +static struct sk_buff *seville_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + void *injection; + + ocelot_xmit_common(skb, netdev, cpu_to_be32(0x88800005), &injection); + seville_ifh_set_dest(injection, BIT_ULL(dp->index)); return skb; } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { u64 src_port, qos_class; + u64 vlan_tci, tag_type; u8 *start = skb->data; + struct dsa_port *dp; u8 *extraction; + u16 vlan_tpid; + u64 rew_val; /* Revert skb->data by the amount consumed by the DSA master, * so it points to the beginning of the frame. */ skb_push(skb, ETH_HLEN); - /* We don't care about the long prefix, it is just for easy entrance + /* We don't care about the short prefix, it is just for easy entrance * into the DSA master's RX filter. Discard it now by moving it into * the headroom. */ - skb_pull(skb, OCELOT_LONG_PREFIX_LEN); + skb_pull(skb, OCELOT_SHORT_PREFIX_LEN); /* And skb->data now points to the extraction frame header. * Keep a pointer to it. */ @@ -206,10 +129,13 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, skb_pull(skb, ETH_HLEN); /* Remove from inet csum the extraction header */ - skb_postpull_rcsum(skb, start, OCELOT_LONG_PREFIX_LEN + OCELOT_TAG_LEN); + skb_postpull_rcsum(skb, start, OCELOT_TOTAL_TAG_LEN); - packing(extraction, &src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0); - packing(extraction, &qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0); + ocelot_xfh_get_src_port(extraction, &src_port); + ocelot_xfh_get_qos_class(extraction, &qos_class); + ocelot_xfh_get_tag_type(extraction, &tag_type); + ocelot_xfh_get_vlan_tci(extraction, &vlan_tci); + ocelot_xfh_get_rew_val(extraction, &rew_val); skb->dev = dsa_master_find_slave(netdev, 0, src_port); if (!skb->dev) @@ -221,21 +147,70 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, */ return NULL; - skb->offload_fwd_mark = 1; + dsa_default_offload_fwd_mark(skb); skb->priority = qos_class; + OCELOT_SKB_CB(skb)->tstamp_lo = rew_val; + + /* Ocelot switches copy frames unmodified to the CPU. However, it is + * possible for the user to request a VLAN modification through + * VCAP_IS1_ACT_VID_REPLACE_ENA. In this case, what will happen is that + * the VLAN ID field from the Extraction Header gets updated, but the + * 802.1Q header does not (the classified VLAN only becomes visible on + * egress through the "port tag" of front-panel ports). + * So, for traffic extracted by the CPU, we want to pick up the + * classified VLAN and manually replace the existing 802.1Q header from + * the packet with it, so that the operating system is always up to + * date with the result of tc-vlan actions. + * NOTE: In VLAN-unaware mode, we don't want to do that, we want the + * frame to remain unmodified, because the classified VLAN is always + * equal to the pvid of the ingress port and should not be used for + * processing. + */ + dp = dsa_slave_to_port(skb->dev); + vlan_tpid = tag_type ? ETH_P_8021AD : ETH_P_8021Q; + + if (dsa_port_is_vlan_filtering(dp) && + eth_hdr(skb)->h_proto == htons(vlan_tpid)) { + u16 dummy_vlan_tci; + + skb_push_rcsum(skb, ETH_HLEN); + __skb_vlan_pop(skb, &dummy_vlan_tci); + skb_pull_rcsum(skb, ETH_HLEN); + __vlan_hwaccel_put_tag(skb, htons(vlan_tpid), vlan_tci); + } return skb; } -static struct dsa_device_ops ocelot_netdev_ops = { +static const struct dsa_device_ops ocelot_netdev_ops = { .name = "ocelot", .proto = DSA_TAG_PROTO_OCELOT, .xmit = ocelot_xmit, .rcv = ocelot_rcv, - .overhead = OCELOT_TAG_LEN + OCELOT_LONG_PREFIX_LEN, + .needed_headroom = OCELOT_TOTAL_TAG_LEN, + .promisc_on_master = true, }; -MODULE_LICENSE("GPL v2"); +DSA_TAG_DRIVER(ocelot_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT); -module_dsa_tag_driver(ocelot_netdev_ops); +static const struct dsa_device_ops seville_netdev_ops = { + .name = "seville", + .proto = DSA_TAG_PROTO_SEVILLE, + .xmit = seville_xmit, + .rcv = ocelot_rcv, + .needed_headroom = OCELOT_TOTAL_TAG_LEN, + .promisc_on_master = true, +}; + +DSA_TAG_DRIVER(seville_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SEVILLE); + +static struct dsa_tag_driver *ocelot_tag_driver_array[] = { + &DSA_TAG_DRIVER_NAME(ocelot_netdev_ops), + &DSA_TAG_DRIVER_NAME(seville_netdev_ops), +}; + +module_dsa_tag_drivers(ocelot_tag_driver_array); + +MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c new file mode 100644 index 000000000000..37ccf00404ea --- /dev/null +++ b/net/dsa/tag_ocelot_8021q.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2020-2021 NXP + * + * An implementation of the software-defined tag_8021q.c tagger format, which + * also preserves full functionality under a vlan_filtering bridge. It does + * this by using the TCAM engines for: + * - pushing the RX VLAN as a second, outer tag, on egress towards the CPU port + * - redirecting towards the correct front port based on TX VLAN and popping + * that on egress + */ +#include <linux/dsa/8021q.h> +#include <linux/dsa/ocelot.h> +#include "dsa_priv.h" + +struct ocelot_8021q_tagger_private { + struct ocelot_8021q_tagger_data data; /* Must be first */ + struct kthread_worker *xmit_worker; +}; + +static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, + struct sk_buff *skb) +{ + struct ocelot_8021q_tagger_private *priv = dp->ds->tagger_data; + struct ocelot_8021q_tagger_data *data = &priv->data; + void (*xmit_work_fn)(struct kthread_work *work); + struct felix_deferred_xmit_work *xmit_work; + struct kthread_worker *xmit_worker; + + xmit_work_fn = data->xmit_work_fn; + xmit_worker = priv->xmit_worker; + + if (!xmit_work_fn || !xmit_worker) + return NULL; + + /* PTP over IP packets need UDP checksumming. We may have inherited + * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent + * through the DSA master, so calculate the checksum here. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + + xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); + if (!xmit_work) + return NULL; + + /* Calls felix_port_deferred_xmit in felix.c */ + kthread_init_work(&xmit_work->work, xmit_work_fn); + /* Increase refcount so the kfree_skb in dsa_slave_xmit + * won't really free the packet. + */ + xmit_work->dp = dp; + xmit_work->skb = skb_get(skb); + + kthread_queue_work(xmit_worker, &xmit_work->work); + + return NULL; +} + +static struct sk_buff *ocelot_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 queue_mapping = skb_get_queue_mapping(skb); + u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + struct ethhdr *hdr = eth_hdr(skb); + + if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) + return ocelot_defer_xmit(dp, skb); + + return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff *ocelot_rcv(struct sk_buff *skb, + struct net_device *netdev) +{ + int src_port, switch_id; + + dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); + + skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); + if (!skb->dev) + return NULL; + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static void ocelot_disconnect(struct dsa_switch *ds) +{ + struct ocelot_8021q_tagger_private *priv = ds->tagger_data; + + kthread_destroy_worker(priv->xmit_worker); + kfree(priv); + ds->tagger_data = NULL; +} + +static int ocelot_connect(struct dsa_switch *ds) +{ + struct ocelot_8021q_tagger_private *priv; + int err; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->xmit_worker = kthread_create_worker(0, "felix_xmit"); + if (IS_ERR(priv->xmit_worker)) { + err = PTR_ERR(priv->xmit_worker); + kfree(priv); + return err; + } + + ds->tagger_data = priv; + + return 0; +} + +static const struct dsa_device_ops ocelot_8021q_netdev_ops = { + .name = "ocelot-8021q", + .proto = DSA_TAG_PROTO_OCELOT_8021Q, + .xmit = ocelot_xmit, + .rcv = ocelot_rcv, + .connect = ocelot_connect, + .disconnect = ocelot_disconnect, + .needed_headroom = VLAN_HLEN, + .promisc_on_master = true, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT_8021Q); + +module_dsa_tag_driver(ocelot_8021q_netdev_ops); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 70db7c909f74..57d2e00f1e5d 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -4,81 +4,81 @@ */ #include <linux/etherdevice.h> +#include <linux/bitfield.h> +#include <net/dsa.h> +#include <linux/dsa/tag_qca.h> #include "dsa_priv.h" -#define QCA_HDR_LEN 2 -#define QCA_HDR_VERSION 0x2 - -#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) -#define QCA_HDR_RECV_VERSION_S 14 -#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) -#define QCA_HDR_RECV_PRIORITY_S 11 -#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) -#define QCA_HDR_RECV_TYPE_S 6 -#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) -#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) - -#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) -#define QCA_HDR_XMIT_VERSION_S 14 -#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) -#define QCA_HDR_XMIT_PRIORITY_S 11 -#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) -#define QCA_HDR_XMIT_CONTROL_S 8 -#define QCA_HDR_XMIT_FROM_CPU BIT(7) -#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) - static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - u16 *phdr, hdr; - - if (skb_cow_head(skb, QCA_HDR_LEN) < 0) - return NULL; + __be16 *phdr; + u16 hdr; skb_push(skb, QCA_HDR_LEN); - memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); - phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + dsa_alloc_etype_header(skb, QCA_HDR_LEN); + phdr = dsa_etype_header_pos_tx(skb); /* Set the version field, and set destination port information */ - hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | - QCA_HDR_XMIT_FROM_CPU | BIT(dp->index); + hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION); + hdr |= QCA_HDR_XMIT_FROM_CPU; + hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(dp->index)); *phdr = htons(hdr); return skb; } -static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) { - u8 ver; + struct qca_tagger_data *tagger_data; + struct dsa_port *dp = dev->dsa_ptr; + struct dsa_switch *ds = dp->ds; + u8 ver, pk_type; + __be16 *phdr; int port; - __be16 *phdr, hdr; + u16 hdr; + + BUILD_BUG_ON(sizeof(struct qca_mgmt_ethhdr) != QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN); + + tagger_data = ds->tagger_data; if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) return NULL; - /* The QCA header is added by the switch between src addr and Ethertype - * At this point, skb->data points to ethertype so header should be - * right before - */ - phdr = (__be16 *)(skb->data - 2); + phdr = dsa_etype_header_pos_rx(skb); hdr = ntohs(*phdr); /* Make sure the version is correct */ - ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + ver = FIELD_GET(QCA_HDR_RECV_VERSION, hdr); if (unlikely(ver != QCA_HDR_VERSION)) return NULL; + /* Get pk type */ + pk_type = FIELD_GET(QCA_HDR_RECV_TYPE, hdr); + + /* Ethernet mgmt read/write packet */ + if (pk_type == QCA_HDR_RECV_TYPE_RW_REG_ACK) { + if (likely(tagger_data->rw_reg_ack_handler)) + tagger_data->rw_reg_ack_handler(ds, skb); + return NULL; + } + + /* Ethernet MIB counter packet */ + if (pk_type == QCA_HDR_RECV_TYPE_MIB) { + if (likely(tagger_data->mib_autocast_handler)) + tagger_data->mib_autocast_handler(ds, skb); + return NULL; + } + /* Remove QCA tag and recalculate checksum */ skb_pull_rcsum(skb, QCA_HDR_LEN); - memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, - ETH_HLEN - QCA_HDR_LEN); + dsa_strip_etype_header(skb, QCA_HDR_LEN); /* Get source port information */ - port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr); skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) @@ -87,22 +87,34 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } -static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, - int *offset) +static int qca_tag_connect(struct dsa_switch *ds) { - *offset = QCA_HDR_LEN; - *proto = ((__be16 *)skb->data)[0]; + struct qca_tagger_data *tagger_data; + + tagger_data = kzalloc(sizeof(*tagger_data), GFP_KERNEL); + if (!tagger_data) + return -ENOMEM; + + ds->tagger_data = tagger_data; return 0; } +static void qca_tag_disconnect(struct dsa_switch *ds) +{ + kfree(ds->tagger_data); + ds->tagger_data = NULL; +} + static const struct dsa_device_ops qca_netdev_ops = { .name = "qca", .proto = DSA_TAG_PROTO_QCA, + .connect = qca_tag_connect, + .disconnect = qca_tag_disconnect, .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, - .flow_dissect = qca_tag_flow_dissect, - .overhead = QCA_HDR_LEN, + .needed_headroom = QCA_HDR_LEN, + .promisc_on_master = true, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c new file mode 100644 index 000000000000..6d928ee3ef7a --- /dev/null +++ b/net/dsa/tag_rtl4_a.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handler for Realtek 4 byte DSA switch tags + * Currently only supports protocol "A" found in RTL8366RB + * Copyright (c) 2020 Linus Walleij <linus.walleij@linaro.org> + * + * This "proprietary tag" header looks like so: + * + * ------------------------------------------------- + * | MAC DA | MAC SA | 0x8899 | 2 bytes tag | Type | + * ------------------------------------------------- + * + * The 2 bytes tag form a 16 bit big endian word. The exact + * meaning has been guessed from packet dumps from ingress + * frames. + */ + +#include <linux/etherdevice.h> +#include <linux/bits.h> + +#include "dsa_priv.h" + +#define RTL4_A_HDR_LEN 4 +#define RTL4_A_ETHERTYPE 0x8899 +#define RTL4_A_PROTOCOL_SHIFT 12 +/* + * 0x1 = Realtek Remote Control protocol (RRCP) + * 0x2/0x3 seems to be used for loopback testing + * 0x9 = RTL8306 DSA protocol + * 0xa = RTL8366RB DSA protocol + */ +#define RTL4_A_PROTOCOL_RTL8366RB 0xa + +static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *p; + u8 *tag; + u16 out; + + /* Pad out to at least 60 bytes */ + if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false))) + return NULL; + + netdev_dbg(dev, "add realtek tag to package to port %d\n", + dp->index); + skb_push(skb, RTL4_A_HDR_LEN); + + dsa_alloc_etype_header(skb, RTL4_A_HDR_LEN); + tag = dsa_etype_header_pos_tx(skb); + + /* Set Ethertype */ + p = (__be16 *)tag; + *p = htons(RTL4_A_ETHERTYPE); + + out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT); + /* The lower bits indicate the port number */ + out |= BIT(dp->index); + + p = (__be16 *)(tag + 2); + *p = htons(out); + + return skb; +} + +static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + u16 protport; + __be16 *p; + u16 etype; + u8 *tag; + u8 prot; + u8 port; + + if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN))) + return NULL; + + tag = dsa_etype_header_pos_rx(skb); + p = (__be16 *)tag; + etype = ntohs(*p); + if (etype != RTL4_A_ETHERTYPE) { + /* Not custom, just pass through */ + netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype); + return skb; + } + p = (__be16 *)(tag + 2); + protport = ntohs(*p); + /* The 4 upper bits are the protocol */ + prot = (protport >> RTL4_A_PROTOCOL_SHIFT) & 0x0f; + if (prot != RTL4_A_PROTOCOL_RTL8366RB) { + netdev_err(dev, "unknown realtek protocol 0x%01x\n", prot); + return NULL; + } + port = protport & 0xff; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + netdev_dbg(dev, "could not find slave for port %d\n", port); + return NULL; + } + + /* Remove RTL4 tag and recalculate checksum */ + skb_pull_rcsum(skb, RTL4_A_HDR_LEN); + + dsa_strip_etype_header(skb, RTL4_A_HDR_LEN); + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops rtl4a_netdev_ops = { + .name = "rtl4a", + .proto = DSA_TAG_PROTO_RTL4_A, + .xmit = rtl4a_tag_xmit, + .rcv = rtl4a_tag_rcv, + .needed_headroom = RTL4_A_HDR_LEN, +}; +module_dsa_tag_driver(rtl4a_netdev_ops); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A); diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c new file mode 100644 index 000000000000..a593ead7ff26 --- /dev/null +++ b/net/dsa/tag_rtl8_4.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handler for Realtek 8 byte switch tags + * + * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk> + * + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence + * named tag_rtl8_4. + * + * This tag has the following format: + * + * 0 7|8 15 + * |-----------------------------------+-----------------------------------|--- + * | (16-bit) | ^ + * | Realtek EtherType [0x8899] | | + * |-----------------------------------+-----------------------------------| 8 + * | (8-bit) | (8-bit) | + * | Protocol [0x04] | REASON | b + * |-----------------------------------+-----------------------------------| y + * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t + * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e + * |-----------------------------------+-----------------------------------| s + * | (1) | (15-bit) | | + * | ALLOW | TX/RX | v + * |-----------------------------------+-----------------------------------|--- + * + * With the following field descriptions: + * + * field | description + * ------------+------------- + * Realtek | 0x8899: indicates that this is a proprietary Realtek tag; + * EtherType | note that Realtek uses the same EtherType for + * | other incompatible tag formats (e.g. tag_rtl4_a.c) + * Protocol | 0x04: indicates that this tag conforms to this format + * X | reserved + * ------------+------------- + * REASON | reason for forwarding packet to CPU + * | 0: packet was forwarded or flooded to CPU + * | 80: packet was trapped to CPU + * FID_EN | 1: packet has an FID + * | 0: no FID + * FID | FID of packet (if FID_EN=1) + * PRI_EN | 1: force priority of packet + * | 0: don't force priority + * PRI | priority of packet (if PRI_EN=1) + * KEEP | preserve packet VLAN tag format + * LEARN_DIS | don't learn the source MAC address of the packet + * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the + * | packet may only be forwarded to ports specified in the + * | mask + * | 0: no allowance port mask, TX/RX field is the forwarding + * | port mask + * TX/RX | TX (switch->CPU): port number the packet was received on + * | RX (CPU->switch): forwarding port mask (if ALLOW=0) + * | allowance port mask (if ALLOW=1) + * + * The tag can be positioned before Ethertype, using tag "rtl8_4": + * + * +--------+--------+------------+------+----- + * | MAC DA | MAC SA | 8 byte tag | Type | ... + * +--------+--------+------------+------+----- + * + * The tag can also appear between the end of the payload and before the CRC, + * using tag "rtl8_4t": + * + * +--------+--------+------+-----+---------+------------+-----+ + * | MAC DA | MAC SA | TYPE | ... | payload | 8-byte tag | CRC | + * +--------+--------+------+-----+---------+------------+-----+ + * + * The added bytes after the payload will break most checksums, either in + * software or hardware. To avoid this issue, if the checksum is still pending, + * this tagger checksums the packet in software before adding the tag. + * + */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/etherdevice.h> + +#include "dsa_priv.h" + +/* Protocols supported: + * + * 0x04 = RTL8365MB DSA protocol + */ + +#define RTL8_4_TAG_LEN 8 + +#define RTL8_4_PROTOCOL GENMASK(15, 8) +#define RTL8_4_PROTOCOL_RTL8365MB 0x04 +#define RTL8_4_REASON GENMASK(7, 0) +#define RTL8_4_REASON_FORWARD 0 +#define RTL8_4_REASON_TRAP 80 + +#define RTL8_4_LEARN_DIS BIT(5) + +#define RTL8_4_TX GENMASK(3, 0) +#define RTL8_4_RX GENMASK(10, 0) + +static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, + void *tag) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 tag16[RTL8_4_TAG_LEN / 2]; + + /* Set Realtek EtherType */ + tag16[0] = htons(ETH_P_REALTEK); + + /* Set Protocol; zero REASON */ + tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); + + /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */ + tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); + + /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ + tag16[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index))); + + memcpy(tag, tag16, RTL8_4_TAG_LEN); +} + +static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + skb_push(skb, RTL8_4_TAG_LEN); + + dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN); + + rtl8_4_write_tag(skb, dev, dsa_etype_header_pos_tx(skb)); + + return skb; +} + +static struct sk_buff *rtl8_4t_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* Calculate the checksum here if not done yet as trailing tags will + * break either software or hardware based checksum + */ + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + + rtl8_4_write_tag(skb, dev, skb_put(skb, RTL8_4_TAG_LEN)); + + return skb; +} + +static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev, + void *tag) +{ + __be16 tag16[RTL8_4_TAG_LEN / 2]; + u16 etype; + u8 reason; + u8 proto; + u8 port; + + memcpy(tag16, tag, RTL8_4_TAG_LEN); + + /* Parse Realtek EtherType */ + etype = ntohs(tag16[0]); + if (unlikely(etype != ETH_P_REALTEK)) { + dev_warn_ratelimited(&dev->dev, + "non-realtek ethertype 0x%04x\n", etype); + return -EPROTO; + } + + /* Parse Protocol */ + proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag16[1])); + if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) { + dev_warn_ratelimited(&dev->dev, + "unknown realtek protocol 0x%02x\n", + proto); + return -EPROTO; + } + + /* Parse REASON */ + reason = FIELD_GET(RTL8_4_REASON, ntohs(tag16[1])); + + /* Parse TX (switch->CPU) */ + port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3])); + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + dev_warn_ratelimited(&dev->dev, + "could not find slave for port %d\n", + port); + return -ENOENT; + } + + if (reason != RTL8_4_REASON_TRAP) + dsa_default_offload_fwd_mark(skb); + + return 0; +} + +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN))) + return NULL; + + if (unlikely(rtl8_4_read_tag(skb, dev, dsa_etype_header_pos_rx(skb)))) + return NULL; + + /* Remove tag and recalculate checksum */ + skb_pull_rcsum(skb, RTL8_4_TAG_LEN); + + dsa_strip_etype_header(skb, RTL8_4_TAG_LEN); + + return skb; +} + +static struct sk_buff *rtl8_4t_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + if (skb_linearize(skb)) + return NULL; + + if (unlikely(rtl8_4_read_tag(skb, dev, skb_tail_pointer(skb) - RTL8_4_TAG_LEN))) + return NULL; + + if (pskb_trim_rcsum(skb, skb->len - RTL8_4_TAG_LEN)) + return NULL; + + return skb; +} + +/* Ethertype version */ +static const struct dsa_device_ops rtl8_4_netdev_ops = { + .name = "rtl8_4", + .proto = DSA_TAG_PROTO_RTL8_4, + .xmit = rtl8_4_tag_xmit, + .rcv = rtl8_4_tag_rcv, + .needed_headroom = RTL8_4_TAG_LEN, +}; + +DSA_TAG_DRIVER(rtl8_4_netdev_ops); + +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4); + +/* Tail version */ +static const struct dsa_device_ops rtl8_4t_netdev_ops = { + .name = "rtl8_4t", + .proto = DSA_TAG_PROTO_RTL8_4T, + .xmit = rtl8_4t_tag_xmit, + .rcv = rtl8_4t_tag_rcv, + .needed_tailroom = RTL8_4_TAG_LEN, +}; + +DSA_TAG_DRIVER(rtl8_4t_netdev_ops); + +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4T); + +static struct dsa_tag_driver *dsa_tag_drivers[] = { + &DSA_TAG_DRIVER_NAME(rtl8_4_netdev_ops), + &DSA_TAG_DRIVER_NAME(rtl8_4t_netdev_ops), +}; +module_dsa_tag_drivers(dsa_tag_drivers); + +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c new file mode 100644 index 000000000000..e2a5ee6ae688 --- /dev/null +++ b/net/dsa/tag_rzn1_a5psw.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#include <linux/bitfield.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <net/dsa.h> + +#include "dsa_priv.h" + +/* To define the outgoing port and to discover the incoming port a TAG is + * inserted after Src MAC : + * + * Dest MAC Src MAC TAG Type + * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 5 6 7 8 | 1 2 |... + * |<--------------->| + * + * See struct a5psw_tag for layout + */ + +#define ETH_P_DSA_A5PSW 0xE001 +#define A5PSW_TAG_LEN 8 +#define A5PSW_CTRL_DATA_FORCE_FORWARD BIT(0) +/* This is both used for xmit tag and rcv tagging */ +#define A5PSW_CTRL_DATA_PORT GENMASK(3, 0) + +struct a5psw_tag { + __be16 ctrl_tag; + __be16 ctrl_data; + __be16 ctrl_data2_hi; + __be16 ctrl_data2_lo; +}; + +static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct a5psw_tag *ptag; + u32 data2_val; + + BUILD_BUG_ON(sizeof(*ptag) != A5PSW_TAG_LEN); + + /* The Ethernet switch we are interfaced with needs packets to be at + * least 60 bytes otherwise they will be discarded when they enter the + * switch port logic. + */ + if (__skb_put_padto(skb, ETH_ZLEN, false)) + return NULL; + + /* provide 'A5PSW_TAG_LEN' bytes additional space */ + skb_push(skb, A5PSW_TAG_LEN); + + /* make room between MACs and Ether-Type to insert tag */ + dsa_alloc_etype_header(skb, A5PSW_TAG_LEN); + + ptag = dsa_etype_header_pos_tx(skb); + + data2_val = FIELD_PREP(A5PSW_CTRL_DATA_PORT, BIT(dp->index)); + ptag->ctrl_tag = htons(ETH_P_DSA_A5PSW); + ptag->ctrl_data = htons(A5PSW_CTRL_DATA_FORCE_FORWARD); + ptag->ctrl_data2_lo = htons(data2_val); + ptag->ctrl_data2_hi = 0; + + return skb; +} + +static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + struct a5psw_tag *tag; + int port; + + if (unlikely(!pskb_may_pull(skb, A5PSW_TAG_LEN))) { + dev_warn_ratelimited(&dev->dev, + "Dropping packet, cannot pull\n"); + return NULL; + } + + tag = dsa_etype_header_pos_rx(skb); + + if (tag->ctrl_tag != htons(ETH_P_DSA_A5PSW)) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid TAG marker\n"); + return NULL; + } + + port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data)); + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) + return NULL; + + skb_pull_rcsum(skb, A5PSW_TAG_LEN); + dsa_strip_etype_header(skb, A5PSW_TAG_LEN); + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops a5psw_netdev_ops = { + .name = "a5psw", + .proto = DSA_TAG_PROTO_RZN1_A5PSW, + .xmit = a5psw_tag_xmit, + .rcv = a5psw_tag_rcv, + .needed_headroom = A5PSW_TAG_LEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW); +module_dsa_tag_driver(a5psw_netdev_ops); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 5366ea430349..83e4136516b0 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -7,6 +7,71 @@ #include <linux/packing.h> #include "dsa_priv.h" +/* Is this a TX or an RX header? */ +#define SJA1110_HEADER_HOST_TO_SWITCH BIT(15) + +/* RX header */ +#define SJA1110_RX_HEADER_IS_METADATA BIT(14) +#define SJA1110_RX_HEADER_HOST_ONLY BIT(13) +#define SJA1110_RX_HEADER_HAS_TRAILER BIT(12) + +/* Trap-to-host format (no trailer present) */ +#define SJA1110_RX_HEADER_SRC_PORT(x) (((x) & GENMASK(7, 4)) >> 4) +#define SJA1110_RX_HEADER_SWITCH_ID(x) ((x) & GENMASK(3, 0)) + +/* Timestamp format (trailer present) */ +#define SJA1110_RX_HEADER_TRAILER_POS(x) ((x) & GENMASK(11, 0)) + +#define SJA1110_RX_TRAILER_SWITCH_ID(x) (((x) & GENMASK(7, 4)) >> 4) +#define SJA1110_RX_TRAILER_SRC_PORT(x) ((x) & GENMASK(3, 0)) + +/* Meta frame format (for 2-step TX timestamps) */ +#define SJA1110_RX_HEADER_N_TS(x) (((x) & GENMASK(8, 4)) >> 4) + +/* TX header */ +#define SJA1110_TX_HEADER_UPDATE_TC BIT(14) +#define SJA1110_TX_HEADER_TAKE_TS BIT(13) +#define SJA1110_TX_HEADER_TAKE_TS_CASC BIT(12) +#define SJA1110_TX_HEADER_HAS_TRAILER BIT(11) + +/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is false */ +#define SJA1110_TX_HEADER_PRIO(x) (((x) << 7) & GENMASK(10, 7)) +#define SJA1110_TX_HEADER_TSTAMP_ID(x) ((x) & GENMASK(7, 0)) + +/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is true */ +#define SJA1110_TX_HEADER_TRAILER_POS(x) ((x) & GENMASK(10, 0)) + +#define SJA1110_TX_TRAILER_TSTAMP_ID(x) (((x) << 24) & GENMASK(31, 24)) +#define SJA1110_TX_TRAILER_PRIO(x) (((x) << 21) & GENMASK(23, 21)) +#define SJA1110_TX_TRAILER_SWITCHID(x) (((x) << 12) & GENMASK(15, 12)) +#define SJA1110_TX_TRAILER_DESTPORTS(x) (((x) << 1) & GENMASK(11, 1)) + +#define SJA1110_META_TSTAMP_SIZE 10 + +#define SJA1110_HEADER_LEN 4 +#define SJA1110_RX_TRAILER_LEN 13 +#define SJA1110_TX_TRAILER_LEN 4 +#define SJA1110_MAX_PADDING_LEN 15 + +#define SJA1105_HWTS_RX_EN 0 + +struct sja1105_tagger_private { + struct sja1105_tagger_data data; /* Must be first */ + unsigned long state; + /* Protects concurrent access to the meta state machine + * from taggers running on multiple ports on SMP systems + */ + spinlock_t meta_lock; + struct sk_buff *stampable_skb; + struct kthread_worker *xmit_worker; +}; + +static struct sja1105_tagger_private * +sja1105_tagger_private(struct dsa_switch *ds) +{ + return ds->tagger_data; +} + /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ static inline bool sja1105_is_link_local(const struct sk_buff *skb) { @@ -69,60 +134,213 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) return true; } -/* This is the first time the tagger sees the frame on RX. - * Figure out if we can decode it. - */ -static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) -{ - if (!dsa_port_is_vlan_filtering(dev->dsa_ptr)) - return true; - if (sja1105_is_link_local(skb)) - return true; - if (sja1105_is_meta_frame(skb)) - return true; - return false; -} - /* Calls sja1105_port_deferred_xmit in sja1105_main.c */ -static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, +static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) { + struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(dp->ds); + struct sja1105_tagger_private *priv = sja1105_tagger_private(dp->ds); + void (*xmit_work_fn)(struct kthread_work *work); + struct sja1105_deferred_xmit_work *xmit_work; + struct kthread_worker *xmit_worker; + + xmit_work_fn = tagger_data->xmit_work_fn; + xmit_worker = priv->xmit_worker; + + if (!xmit_work_fn || !xmit_worker) + return NULL; + + xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); + if (!xmit_work) + return NULL; + + kthread_init_work(&xmit_work->work, xmit_work_fn); /* Increase refcount so the kfree_skb in dsa_slave_xmit * won't really free the packet. */ - skb_queue_tail(&sp->xmit_queue, skb_get(skb)); - kthread_queue_work(sp->xmit_worker, &sp->xmit_work); + xmit_work->dp = dp; + xmit_work->skb = skb_get(skb); + + kthread_queue_work(xmit_worker, &xmit_work->work); return NULL; } +/* Send VLAN tags with a TPID that blends in with whatever VLAN protocol a + * bridge spanning ports of this switch might have. + */ +static u16 sja1105_xmit_tpid(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + u16 proto; + + /* Since VLAN awareness is global, then if this port is VLAN-unaware, + * all ports are. Use the VLAN-unaware TPID used for tag_8021q. + */ + if (!dsa_port_is_vlan_filtering(dp)) + return ETH_P_SJA1105; + + /* Port is VLAN-aware, so there is a bridge somewhere (a single one, + * we're sure about that). It may not be on this port though, so we + * need to find it. + */ + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(other_dp); + + if (!br) + continue; + + /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING, + * which seems pointless to handle, as our port cannot become + * VLAN-aware in that case. + */ + br_vlan_get_proto(br, &proto); + + return proto; + } + + WARN_ONCE(1, "Port is VLAN-aware but cannot find associated bridge!\n"); + + return ETH_P_SJA1105; +} + +static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + unsigned int bridge_num = dsa_port_bridge_num_get(dp); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u16 tx_vid; + + /* If the port is under a VLAN-aware bridge, just slide the + * VLAN-tagged packet into the FDB and hope for the best. + * This works because we support a single VLAN-aware bridge + * across the entire dst, and its VLANs cannot be shared with + * any standalone port. + */ + if (br_vlan_enabled(br)) + return skb; + + /* If the port is under a VLAN-unaware bridge, use an imprecise + * TX VLAN that targets the bridge's entire broadcast domain, + * instead of just the specific port. + */ + tx_vid = dsa_tag_8021q_bridge_vid(bridge_num); + + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid); +} + +/* Transform untagged control packets into pvid-tagged control packets so that + * all packets sent by this tagger are VLAN-tagged and we can configure the + * switch to drop untagged packets coming from the DSA master. + */ +static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, + struct sk_buff *skb, u8 pcp) +{ + __be16 xmit_tpid = htons(sja1105_xmit_tpid(dp)); + struct vlan_ethhdr *hdr; + + /* If VLAN tag is in hwaccel area, move it to the payload + * to deal with both cases uniformly and to ensure that + * the VLANs are added in the right order. + */ + if (unlikely(skb_vlan_tag_present(skb))) { + skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + return NULL; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + + /* If skb is already VLAN-tagged, leave that VLAN ID in place */ + if (hdr->h_vlan_proto == xmit_tpid) + return skb; + + return vlan_insert_tag(skb, xmit_tpid, (pcp << VLAN_PRIO_SHIFT) | + SJA1105_DEFAULT_VLAN); +} + static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); /* Transmitting management traffic does not rely upon switch tagging, * but instead SPI-installed management routes. Part 2 of this * is the .port_deferred_xmit driver callback. */ - if (unlikely(sja1105_is_link_local(skb))) - return sja1105_defer_xmit(dp->priv, skb); + if (unlikely(sja1105_is_link_local(skb))) { + skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp); + if (!skb) + return NULL; - /* If we are under a vlan_filtering bridge, IP termination on - * switch ports based on 802.1Q tags is simply too brittle to - * be passable. So just defer to the dsa_slave_notag_xmit - * implementation. - */ - if (dsa_port_is_vlan_filtering(dp)) - return skb; + return sja1105_defer_xmit(dp, skb); + } - return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105, + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); } +static struct sk_buff *sja1110_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; + struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 queue_mapping = skb_get_queue_mapping(skb); + u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + __be32 *tx_trailer; + __be16 *tx_header; + int trailer_pos; + + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); + + /* Transmitting control packets is done using in-band control + * extensions, while data packets are transmitted using + * tag_8021q TX VLANs. + */ + if (likely(!sja1105_is_link_local(skb))) + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); + + skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp); + if (!skb) + return NULL; + + skb_push(skb, SJA1110_HEADER_LEN); + + dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN); + + trailer_pos = skb->len; + + tx_header = dsa_etype_header_pos_tx(skb); + tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN); + + tx_header[0] = htons(ETH_P_SJA1110); + tx_header[1] = htons(SJA1110_HEADER_HOST_TO_SWITCH | + SJA1110_TX_HEADER_HAS_TRAILER | + SJA1110_TX_HEADER_TRAILER_POS(trailer_pos)); + *tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) | + SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) | + SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index))); + if (clone) { + u8 ts_id = SJA1105_SKB_CB(clone)->ts_id; + + tx_header[1] |= htons(SJA1110_TX_HEADER_TAKE_TS); + *tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id)); + } + + return skb; +} + static void sja1105_transfer_meta(struct sk_buff *skb, const struct sja1105_meta *meta) { @@ -130,7 +348,7 @@ static void sja1105_transfer_meta(struct sk_buff *skb, hdr->h_dest[3] = meta->dmac_byte_3; hdr->h_dest[4] = meta->dmac_byte_4; - SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp; + SJA1105_SKB_CB(skb)->tstamp = meta->tstamp; } /* This is a simple state machine which follows the hardware mechanism of @@ -159,37 +377,37 @@ static struct sk_buff bool is_link_local, bool is_meta) { - struct sja1105_port *sp; - struct dsa_port *dp; - - dp = dsa_slave_to_port(skb->dev); - sp = dp->priv; - /* Step 1: A timestampable frame was received. * Buffer it until we get its meta frame. */ if (is_link_local) { - if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_tagger_private *priv; + struct dsa_switch *ds = dp->ds; + + priv = sja1105_tagger_private(ds); + + if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) /* Do normal processing. */ return skb; - spin_lock(&sp->data->meta_lock); + spin_lock(&priv->meta_lock); /* Was this a link-local frame instead of the meta * that we were expecting? */ - if (sp->data->stampable_skb) { - dev_err_ratelimited(dp->ds->dev, + if (priv->stampable_skb) { + dev_err_ratelimited(ds->dev, "Expected meta frame, is %12llx " "in the DSA master multicast filter?\n", SJA1105_META_DMAC); - kfree_skb(sp->data->stampable_skb); + kfree_skb(priv->stampable_skb); } /* Hold a reference to avoid dsa_switch_rcv * from freeing the skb. */ - sp->data->stampable_skb = skb_get(skb); - spin_unlock(&sp->data->meta_lock); + priv->stampable_skb = skb_get(skb); + spin_unlock(&priv->meta_lock); /* Tell DSA we got nothing */ return NULL; @@ -201,33 +419,38 @@ static struct sk_buff * frame, which serves no further purpose). */ } else if (is_meta) { + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_tagger_private *priv; + struct dsa_switch *ds = dp->ds; struct sk_buff *stampable_skb; + priv = sja1105_tagger_private(ds); + /* Drop the meta frame if we're not in the right state * to process it. */ - if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) + if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) return NULL; - spin_lock(&sp->data->meta_lock); + spin_lock(&priv->meta_lock); - stampable_skb = sp->data->stampable_skb; - sp->data->stampable_skb = NULL; + stampable_skb = priv->stampable_skb; + priv->stampable_skb = NULL; /* Was this a meta frame instead of the link-local * that we were expecting? */ if (!stampable_skb) { - dev_err_ratelimited(dp->ds->dev, + dev_err_ratelimited(ds->dev, "Unexpected meta frame\n"); - spin_unlock(&sp->data->meta_lock); + spin_unlock(&priv->meta_lock); return NULL; } if (stampable_skb->dev != skb->dev) { - dev_err_ratelimited(dp->ds->dev, + dev_err_ratelimited(ds->dev, "Meta frame on wrong port\n"); - spin_unlock(&sp->data->meta_lock); + spin_unlock(&priv->meta_lock); return NULL; } @@ -238,39 +461,88 @@ static struct sk_buff skb = stampable_skb; sja1105_transfer_meta(skb, meta); - spin_unlock(&sp->data->meta_lock); + spin_unlock(&priv->meta_lock); } return skb; } +static bool sja1105_rxtstamp_get_state(struct dsa_switch *ds) +{ + struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); + + return test_bit(SJA1105_HWTS_RX_EN, &priv->state); +} + +static void sja1105_rxtstamp_set_state(struct dsa_switch *ds, bool on) +{ + struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); + + if (on) + set_bit(SJA1105_HWTS_RX_EN, &priv->state); + else + clear_bit(SJA1105_HWTS_RX_EN, &priv->state); + + /* Initialize the meta state machine to a known state */ + if (!priv->stampable_skb) + return; + + kfree_skb(priv->stampable_skb); + priv->stampable_skb = NULL; +} + +static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb) +{ + u16 tpid = ntohs(eth_hdr(skb)->h_proto); + + return tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q || + skb_vlan_tag_present(skb); +} + +static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) +{ + return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110; +} + +/* If the VLAN in the packet is a tag_8021q one, set @source_port and + * @switch_id and strip the header. Otherwise set @vid and keep it in the + * packet. + */ +static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, + int *switch_id, int *vbid, u16 *vid) +{ + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + u16 vlan_tci; + + if (skb_vlan_tag_present(skb)) + vlan_tci = skb_vlan_tag_get(skb); + else + vlan_tci = ntohs(hdr->h_vlan_TCI); + + if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK)) + return dsa_8021q_rcv(skb, source_port, switch_id, vbid); + + /* Try our best with imprecise RX */ + *vid = vlan_tci & VLAN_VID_MASK; +} + static struct sk_buff *sja1105_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { + int source_port = -1, switch_id = -1, vbid = -1; struct sja1105_meta meta = {0}; - int source_port, switch_id; - struct vlan_ethhdr *hdr; - u16 tpid, vid, tci; + struct ethhdr *hdr; bool is_link_local; - bool is_tagged; bool is_meta; + u16 vid; - hdr = vlan_eth_hdr(skb); - tpid = ntohs(hdr->h_vlan_proto); - is_tagged = (tpid == ETH_P_SJA1105); + hdr = eth_hdr(skb); is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); - skb->offload_fwd_mark = 1; - - if (is_tagged) { + if (sja1105_skb_has_tag_8021q(skb)) { /* Normal traffic path. */ - tci = ntohs(hdr->h_vlan_TCI); - vid = tci & VLAN_VID_MASK; - source_port = dsa_8021q_rx_source_port(vid); - switch_id = dsa_8021q_rx_switch_id(vid); - skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); } else if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of @@ -289,32 +561,265 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (vbid >= 1) + skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); + else if (source_port == -1 || switch_id == -1) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; } - /* Delete/overwrite fake VLAN header, DSA expects to not find - * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN). - */ - if (is_tagged) - skb = dsa_8021q_remove_header(skb); + if (!is_link_local) + dsa_default_offload_fwd_mark(skb); return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, is_meta); } -static struct dsa_device_ops sja1105_netdev_ops = { +static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) +{ + u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN; + int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); + int n_ts = SJA1110_RX_HEADER_N_TS(rx_header); + struct sja1105_tagger_data *tagger_data; + struct net_device *master = skb->dev; + struct dsa_port *cpu_dp; + struct dsa_switch *ds; + int i; + + cpu_dp = master->dsa_ptr; + ds = dsa_switch_find(cpu_dp->dst->index, switch_id); + if (!ds) { + net_err_ratelimited("%s: cannot find switch id %d\n", + master->name, switch_id); + return NULL; + } + + tagger_data = sja1105_tagger_data(ds); + if (!tagger_data->meta_tstamp_handler) + return NULL; + + for (i = 0; i <= n_ts; i++) { + u8 ts_id, source_port, dir; + u64 tstamp; + + ts_id = buf[0]; + source_port = (buf[1] & GENMASK(7, 4)) >> 4; + dir = (buf[1] & BIT(3)) >> 3; + tstamp = be64_to_cpu(*(__be64 *)(buf + 2)); + + tagger_data->meta_tstamp_handler(ds, source_port, ts_id, dir, + tstamp); + + buf += SJA1110_META_TSTAMP_SIZE; + } + + /* Discard the meta frame, we've consumed the timestamps it contained */ + return NULL; +} + +static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, + int *source_port, + int *switch_id, + bool *host_only) +{ + u16 rx_header; + + if (unlikely(!pskb_may_pull(skb, SJA1110_HEADER_LEN))) + return NULL; + + /* skb->data points to skb_mac_header(skb) + ETH_HLEN, which is exactly + * what we need because the caller has checked the EtherType (which is + * located 2 bytes back) and we just need a pointer to the header that + * comes afterwards. + */ + rx_header = ntohs(*(__be16 *)skb->data); + + if (rx_header & SJA1110_RX_HEADER_HOST_ONLY) + *host_only = true; + + if (rx_header & SJA1110_RX_HEADER_IS_METADATA) + return sja1110_rcv_meta(skb, rx_header); + + /* Timestamp frame, we have a trailer */ + if (rx_header & SJA1110_RX_HEADER_HAS_TRAILER) { + int start_of_padding = SJA1110_RX_HEADER_TRAILER_POS(rx_header); + u8 *rx_trailer = skb_tail_pointer(skb) - SJA1110_RX_TRAILER_LEN; + u64 *tstamp = &SJA1105_SKB_CB(skb)->tstamp; + u8 last_byte = rx_trailer[12]; + + /* The timestamp is unaligned, so we need to use packing() + * to get it + */ + packing(rx_trailer, tstamp, 63, 0, 8, UNPACK, 0); + + *source_port = SJA1110_RX_TRAILER_SRC_PORT(last_byte); + *switch_id = SJA1110_RX_TRAILER_SWITCH_ID(last_byte); + + /* skb->len counts from skb->data, while start_of_padding + * counts from the destination MAC address. Right now skb->data + * is still as set by the DSA master, so to trim away the + * padding and trailer we need to account for the fact that + * skb->data points to skb_mac_header(skb) + ETH_HLEN. + */ + pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN); + /* Trap-to-host frame, no timestamp trailer */ + } else { + *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header); + *switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); + } + + /* Advance skb->data past the DSA header */ + skb_pull_rcsum(skb, SJA1110_HEADER_LEN); + + dsa_strip_etype_header(skb, SJA1110_HEADER_LEN); + + /* With skb->data in its final place, update the MAC header + * so that eth_hdr() continues to works properly. + */ + skb_set_mac_header(skb, -ETH_HLEN); + + return skb; +} + +static struct sk_buff *sja1110_rcv(struct sk_buff *skb, + struct net_device *netdev) +{ + int source_port = -1, switch_id = -1, vbid = -1; + bool host_only = false; + u16 vid = 0; + + if (sja1110_skb_has_inband_control_extension(skb)) { + skb = sja1110_rcv_inband_control_extension(skb, &source_port, + &switch_id, + &host_only); + if (!skb) + return NULL; + } + + /* Packets with in-band control extensions might still have RX VLANs */ + if (likely(sja1105_skb_has_tag_8021q(skb))) + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); + + if (vbid >= 1) + skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); + else if (source_port == -1 || switch_id == -1) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (!skb->dev) { + netdev_warn(netdev, "Couldn't decode source port\n"); + return NULL; + } + + if (!host_only) + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static void sja1105_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + /* No tag added for management frames, all ok */ + if (unlikely(sja1105_is_link_local(skb))) + return; + + dsa_tag_generic_flow_dissect(skb, proto, offset); +} + +static void sja1110_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + /* Management frames have 2 DSA tags on RX, so the needed_headroom we + * declared is fine for the generic dissector adjustment procedure. + */ + if (unlikely(sja1105_is_link_local(skb))) + return dsa_tag_generic_flow_dissect(skb, proto, offset); + + /* For the rest, there is a single DSA tag, the tag_8021q one */ + *offset = VLAN_HLEN; + *proto = ((__be16 *)skb->data)[(VLAN_HLEN / 2) - 1]; +} + +static void sja1105_disconnect(struct dsa_switch *ds) +{ + struct sja1105_tagger_private *priv = ds->tagger_data; + + kthread_destroy_worker(priv->xmit_worker); + kfree(priv); + ds->tagger_data = NULL; +} + +static int sja1105_connect(struct dsa_switch *ds) +{ + struct sja1105_tagger_data *tagger_data; + struct sja1105_tagger_private *priv; + struct kthread_worker *xmit_worker; + int err; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + spin_lock_init(&priv->meta_lock); + + xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit", + ds->dst->index, ds->index); + if (IS_ERR(xmit_worker)) { + err = PTR_ERR(xmit_worker); + kfree(priv); + return err; + } + + priv->xmit_worker = xmit_worker; + /* Export functions for switch driver use */ + tagger_data = &priv->data; + tagger_data->rxtstamp_get_state = sja1105_rxtstamp_get_state; + tagger_data->rxtstamp_set_state = sja1105_rxtstamp_set_state; + ds->tagger_data = priv; + + return 0; +} + +static const struct dsa_device_ops sja1105_netdev_ops = { .name = "sja1105", .proto = DSA_TAG_PROTO_SJA1105, .xmit = sja1105_xmit, .rcv = sja1105_rcv, - .filter = sja1105_filter, - .overhead = VLAN_HLEN, + .connect = sja1105_connect, + .disconnect = sja1105_disconnect, + .needed_headroom = VLAN_HLEN, + .flow_dissect = sja1105_flow_dissect, + .promisc_on_master = true, }; -MODULE_LICENSE("GPL v2"); +DSA_TAG_DRIVER(sja1105_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105); -module_dsa_tag_driver(sja1105_netdev_ops); +static const struct dsa_device_ops sja1110_netdev_ops = { + .name = "sja1110", + .proto = DSA_TAG_PROTO_SJA1110, + .xmit = sja1110_xmit, + .rcv = sja1110_rcv, + .connect = sja1105_connect, + .disconnect = sja1105_disconnect, + .flow_dissect = sja1110_flow_dissect, + .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN, + .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN, +}; + +DSA_TAG_DRIVER(sja1110_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1110); + +static struct dsa_tag_driver *sja1105_tag_driver_array[] = { + &DSA_TAG_DRIVER_NAME(sja1105_netdev_ops), + &DSA_TAG_DRIVER_NAME(sja1110_netdev_ops), +}; + +module_dsa_tag_drivers(sja1105_tag_driver_array); + +MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 4f8ab62f0208..5749ba85c2b8 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -13,46 +13,18 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; - int padlen; u8 *trailer; - /* - * We have to make sure that the trailer ends up as the very - * last 4 bytes of the packet. This means that we have to pad - * the packet to the minimum ethernet frame size, if necessary, - * before adding the trailer. - */ - padlen = 0; - if (skb->len < 60) - padlen = 60 - skb->len; - - nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); - if (!nskb) - return NULL; - skb_reserve(nskb, NET_IP_ALIGN); - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb_network_header(skb) - skb->head); - skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); - skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - consume_skb(skb); - - if (padlen) { - skb_put_zero(nskb, padlen); - } - - trailer = skb_put(nskb, 4); + trailer = skb_put(skb, 4); trailer[0] = 0x80; trailer[1] = 1 << dp->index; trailer[2] = 0x10; trailer[3] = 0x00; - return nskb; + return skb; } -static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *trailer; int source_port; @@ -82,7 +54,7 @@ static const struct dsa_device_ops trailer_netdev_ops = { .proto = DSA_TAG_PROTO_TRAILER, .xmit = trailer_xmit, .rcv = trailer_rcv, - .overhead = 4, + .needed_tailroom = 4, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c new file mode 100644 index 000000000000..ff442b8af636 --- /dev/null +++ b/net/dsa/tag_xrs700x.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * XRS700x tag format handling + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2020 NovaTech LLC + */ + +#include <linux/bitops.h> + +#include "dsa_priv.h" + +static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *partner, *dp = dsa_slave_to_port(dev); + u8 *trailer; + + trailer = skb_put(skb, 1); + trailer[0] = BIT(dp->index); + + if (dp->hsr_dev) + dsa_hsr_foreach_port(partner, dp->ds, dp->hsr_dev) + if (partner != dp) + trailer[0] |= BIT(partner->index); + + return skb; +} + +static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) +{ + int source_port; + u8 *trailer; + + trailer = skb_tail_pointer(skb) - 1; + + source_port = ffs((int)trailer[0]) - 1; + + if (source_port < 0) + return NULL; + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) + return NULL; + + if (pskb_trim_rcsum(skb, skb->len - 1)) + return NULL; + + /* Frame is forwarded by hardware, don't forward in software. */ + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops xrs700x_netdev_ops = { + .name = "xrs700x", + .proto = DSA_TAG_PROTO_XRS700X, + .xmit = xrs700x_xmit, + .rcv = xrs700x_rcv, + .needed_tailroom = 1, +}; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_XRS700X); + +module_dsa_tag_driver(xrs700x_netdev_ops); |