diff options
28 files changed, 412 insertions, 230 deletions
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 6b6130b8bdc4..b918c7329426 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -201,8 +201,8 @@ static int c_can_plat_probe(struct platform_device *pdev) priv->instance = pdev->id; res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->raminit_ctrlreg = devm_request_and_ioremap(&pdev->dev, res); - if (!priv->raminit_ctrlreg || priv->instance < 0) + priv->raminit_ctrlreg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->raminit_ctrlreg) || priv->instance < 0) dev_info(&pdev->dev, "control memory is not used for raminit\n"); else priv->raminit = c_can_hw_raminit; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 30e74211a755..ad5272b348f0 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1012,10 +1012,8 @@ static int vortex_init_one(struct pci_dev *pdev, goto out; rc = pci_request_regions(pdev, DRV_NAME); - if (rc < 0) { - pci_disable_device(pdev); - goto out; - } + if (rc < 0) + goto out_disable; unit = vortex_cards_found; @@ -1032,23 +1030,24 @@ static int vortex_init_one(struct pci_dev *pdev, if (!ioaddr) /* If mapping fails, fall-back to BAR 0... */ ioaddr = pci_iomap(pdev, 0, 0); if (!ioaddr) { - pci_release_regions(pdev); - pci_disable_device(pdev); rc = -ENOMEM; - goto out; + goto out_release; } rc = vortex_probe1(&pdev->dev, ioaddr, pdev->irq, ent->driver_data, unit); - if (rc < 0) { - pci_iounmap(pdev, ioaddr); - pci_release_regions(pdev); - pci_disable_device(pdev); - goto out; - } + if (rc < 0) + goto out_iounmap; vortex_cards_found++; + goto out; +out_iounmap: + pci_iounmap(pdev, ioaddr); +out_release: + pci_release_regions(pdev); +out_disable: + pci_disable_device(pdev); out: return rc; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index fa4a68fc01cb..c540055e4f4f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1876,17 +1876,17 @@ fec_probe(struct platform_device *pdev) (pdev->id_entry->driver_data & FEC_QUIRK_HAS_GBIT)) fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG; - fep->hwp = devm_request_and_ioremap(&pdev->dev, r); + fep->hwp = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(fep->hwp)) { + ret = PTR_ERR(fep->hwp); + goto failed_ioremap; + } + fep->pdev = pdev; fep->dev_id = dev_id++; fep->bufdesc_ex = 0; - if (!fep->hwp) { - ret = -ENOMEM; - goto failed_ioremap; - } - platform_set_drvdata(pdev, ndev); ret = of_get_phy_mode(pdev->dev.of_node); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 14f06947e70d..8d2db7b808b7 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -128,6 +128,7 @@ static void gfar_set_multi(struct net_device *dev); static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr); static void gfar_configure_serdes(struct net_device *dev); static int gfar_poll(struct napi_struct *napi, int budget); +static int gfar_poll_sq(struct napi_struct *napi, int budget); #ifdef CONFIG_NET_POLL_CONTROLLER static void gfar_netpoll(struct net_device *dev); #endif @@ -1038,9 +1039,13 @@ static int gfar_probe(struct platform_device *ofdev) dev->ethtool_ops = &gfar_ethtool_ops; /* Register for napi ...We are registering NAPI for each grp */ - for (i = 0; i < priv->num_grps; i++) - netif_napi_add(dev, &priv->gfargrp[i].napi, gfar_poll, + if (priv->mode == SQ_SG_MODE) + netif_napi_add(dev, &priv->gfargrp[0].napi, gfar_poll_sq, GFAR_DEV_WEIGHT); + else + for (i = 0; i < priv->num_grps; i++) + netif_napi_add(dev, &priv->gfargrp[i].napi, gfar_poll, + GFAR_DEV_WEIGHT); if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -2823,6 +2828,48 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) return howmany; } +static int gfar_poll_sq(struct napi_struct *napi, int budget) +{ + struct gfar_priv_grp *gfargrp = + container_of(napi, struct gfar_priv_grp, napi); + struct gfar __iomem *regs = gfargrp->regs; + struct gfar_priv_tx_q *tx_queue = gfargrp->priv->tx_queue[0]; + struct gfar_priv_rx_q *rx_queue = gfargrp->priv->rx_queue[0]; + int work_done = 0; + + /* Clear IEVENT, so interrupts aren't called again + * because of the packets that have already arrived + */ + gfar_write(®s->ievent, IEVENT_RTX_MASK); + + /* run Tx cleanup to completion */ + if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) + gfar_clean_tx_ring(tx_queue); + + work_done = gfar_clean_rx_ring(rx_queue, budget); + + if (work_done < budget) { + napi_complete(napi); + /* Clear the halt bit in RSTAT */ + gfar_write(®s->rstat, gfargrp->rstat); + + gfar_write(®s->imask, IMASK_DEFAULT); + + /* If we are coalescing interrupts, update the timer + * Otherwise, clear it + */ + gfar_write(®s->txic, 0); + if (likely(tx_queue->txcoalescing)) + gfar_write(®s->txic, tx_queue->txic); + + gfar_write(®s->rxic, 0); + if (unlikely(rx_queue->rxcoalescing)) + gfar_write(®s->rxic, rx_queue->rxic); + } + + return work_done; +} + static int gfar_poll(struct napi_struct *napi, int budget) { struct gfar_priv_grp *gfargrp = diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 43d8490cfe0d..67a9e962f142 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2486,7 +2486,7 @@ static const u16 *sh_eth_get_register_offset(int register_type) return reg_offset; } -static struct net_device_ops sh_eth_netdev_ops = { +static const struct net_device_ops sh_eth_netdev_ops = { .ndo_open = sh_eth_open, .ndo_stop = sh_eth_close, .ndo_start_xmit = sh_eth_start_xmit, @@ -2498,6 +2498,21 @@ static struct net_device_ops sh_eth_netdev_ops = { .ndo_change_mtu = eth_change_mtu, }; +static const struct net_device_ops sh_eth_netdev_ops_tsu = { + .ndo_open = sh_eth_open, + .ndo_stop = sh_eth_close, + .ndo_start_xmit = sh_eth_start_xmit, + .ndo_get_stats = sh_eth_get_stats, + .ndo_set_rx_mode = sh_eth_set_multicast_list, + .ndo_vlan_rx_add_vid = sh_eth_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = sh_eth_vlan_rx_kill_vid, + .ndo_tx_timeout = sh_eth_tx_timeout, + .ndo_do_ioctl = sh_eth_do_ioctl, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, + .ndo_change_mtu = eth_change_mtu, +}; + static int sh_eth_drv_probe(struct platform_device *pdev) { int ret, devno = 0; @@ -2568,14 +2583,10 @@ static int sh_eth_drv_probe(struct platform_device *pdev) sh_eth_set_default_cpu_data(mdp->cd); /* set function */ - if (mdp->cd->tsu) { - sh_eth_netdev_ops.ndo_set_rx_mode = sh_eth_set_multicast_list; - sh_eth_netdev_ops.ndo_vlan_rx_add_vid = sh_eth_vlan_rx_add_vid; - sh_eth_netdev_ops.ndo_vlan_rx_kill_vid = - sh_eth_vlan_rx_kill_vid; - } - - ndev->netdev_ops = &sh_eth_netdev_ops; + if (mdp->cd->tsu) + ndev->netdev_ops = &sh_eth_netdev_ops_tsu; + else + ndev->netdev_ops = &sh_eth_netdev_ops; SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops); ndev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 39d6bd77f015..9a2914cfd345 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -784,6 +784,7 @@ struct efx_nic { char name[IFNAMSIZ]; struct pci_dev *pci_dev; + unsigned int port_num; const struct efx_nic_type *type; int legacy_irq; bool legacy_irq_enabled; @@ -916,7 +917,7 @@ static inline int efx_dev_registered(struct efx_nic *efx) static inline unsigned int efx_port_num(struct efx_nic *efx) { - return efx->net_dev->dev_id; + return efx->port_num; } /** diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 51669244d154..8c91775e3c5f 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -304,7 +304,7 @@ static int siena_probe_nic(struct efx_nic *efx) } efx_reado(efx, ®, FR_AZ_CS_DEBUG); - efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; efx_mcdi_init(efx); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 1cd131bde680..fd4dbdae5331 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1159,9 +1159,11 @@ static int xemaclite_of_probe(struct platform_device *ofdev) ndev->irq = res->start; res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); - lp->base_addr = devm_request_and_ioremap(&ofdev->dev, res); - if (!lp->base_addr) + lp->base_addr = devm_ioremap_resource(&ofdev->dev, res); + if (IS_ERR(lp->base_addr)) { + rc = PTR_ERR(lp->base_addr); goto error; + } ndev->mem_start = res->start; ndev->mem_end = res->end; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 04e9adb748c5..e46fef38685b 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -525,31 +525,26 @@ static void team_set_no_mode(struct team *team) team->mode = &__team_no_mode; } -static void __team_adjust_ops(struct team *team, int en_port_count) +static void team_adjust_ops(struct team *team) { /* * To avoid checks in rx/tx skb paths, ensure here that non-null and * correct ops are always set. */ - if (!en_port_count || !team_is_mode_set(team) || + if (!team->en_port_count || !team_is_mode_set(team) || !team->mode->ops->transmit) team->ops.transmit = team_dummy_transmit; else team->ops.transmit = team->mode->ops->transmit; - if (!en_port_count || !team_is_mode_set(team) || + if (!team->en_port_count || !team_is_mode_set(team) || !team->mode->ops->receive) team->ops.receive = team_dummy_receive; else team->ops.receive = team->mode->ops->receive; } -static void team_adjust_ops(struct team *team) -{ - __team_adjust_ops(team, team->en_port_count); -} - /* * We can benefit from the fact that it's ensured no port is present * at the time of mode change. Therefore no packets are in fly so there's no @@ -725,9 +720,9 @@ static bool team_queue_override_transmit(struct team *team, struct sk_buff *skb) static void __team_queue_override_port_del(struct team *team, struct team_port *port) { + if (!port->queue_id) + return; list_del_rcu(&port->qom_list); - synchronize_rcu(); - INIT_LIST_HEAD(&port->qom_list); } static bool team_queue_override_port_has_gt_prio_than(struct team_port *port, @@ -749,9 +744,8 @@ static void __team_queue_override_port_add(struct team *team, struct list_head *qom_list; struct list_head *node; - if (!port->queue_id || !team_port_enabled(port)) + if (!port->queue_id) return; - qom_list = __team_get_qom_list(team, port->queue_id); node = qom_list; list_for_each_entry(cur, qom_list, qom_list) { @@ -768,7 +762,7 @@ static void __team_queue_override_enabled_check(struct team *team) bool enabled = false; list_for_each_entry(port, &team->port_list, list) { - if (!list_empty(&port->qom_list)) { + if (port->queue_id) { enabled = true; break; } @@ -780,14 +774,44 @@ static void __team_queue_override_enabled_check(struct team *team) team->queue_override_enabled = enabled; } -static void team_queue_override_port_refresh(struct team *team, - struct team_port *port) +static void team_queue_override_port_prio_changed(struct team *team, + struct team_port *port) { + if (!port->queue_id || team_port_enabled(port)) + return; __team_queue_override_port_del(team, port); __team_queue_override_port_add(team, port); __team_queue_override_enabled_check(team); } +static void team_queue_override_port_change_queue_id(struct team *team, + struct team_port *port, + u16 new_queue_id) +{ + if (team_port_enabled(port)) { + __team_queue_override_port_del(team, port); + port->queue_id = new_queue_id; + __team_queue_override_port_add(team, port); + __team_queue_override_enabled_check(team); + } else { + port->queue_id = new_queue_id; + } +} + +static void team_queue_override_port_add(struct team *team, + struct team_port *port) +{ + __team_queue_override_port_add(team, port); + __team_queue_override_enabled_check(team); +} + +static void team_queue_override_port_del(struct team *team, + struct team_port *port) +{ + __team_queue_override_port_del(team, port); + __team_queue_override_enabled_check(team); +} + /**************** * Port handling @@ -819,7 +843,7 @@ static void team_port_enable(struct team *team, hlist_add_head_rcu(&port->hlist, team_port_index_hash(team, port->index)); team_adjust_ops(team); - team_queue_override_port_refresh(team, port); + team_queue_override_port_add(team, port); if (team->ops.port_enabled) team->ops.port_enabled(team, port); } @@ -848,14 +872,9 @@ static void team_port_disable(struct team *team, hlist_del_rcu(&port->hlist); __reconstruct_port_hlist(team, port->index); port->index = -1; - team_queue_override_port_refresh(team, port); - __team_adjust_ops(team, team->en_port_count - 1); - /* - * Wait until readers see adjusted ops. This ensures that - * readers never see team->en_port_count == 0 - */ - synchronize_rcu(); team->en_port_count--; + team_queue_override_port_del(team, port); + team_adjust_ops(team); } #define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ @@ -1163,8 +1182,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_set_orig_dev_addr(port); dev_set_mtu(port_dev, port->orig.mtu); - synchronize_rcu(); - kfree(port); + kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); __team_compute_features(team); @@ -1259,9 +1277,12 @@ static int team_priority_option_set(struct team *team, struct team_gsetter_ctx *ctx) { struct team_port *port = ctx->info->port; + s32 priority = ctx->data.s32_val; - port->priority = ctx->data.s32_val; - team_queue_override_port_refresh(team, port); + if (port->priority == priority) + return 0; + port->priority = priority; + team_queue_override_port_prio_changed(team, port); return 0; } @@ -1278,17 +1299,16 @@ static int team_queue_id_option_set(struct team *team, struct team_gsetter_ctx *ctx) { struct team_port *port = ctx->info->port; + u16 new_queue_id = ctx->data.u32_val; - if (port->queue_id == ctx->data.u32_val) + if (port->queue_id == new_queue_id) return 0; - if (ctx->data.u32_val >= team->dev->real_num_tx_queues) + if (new_queue_id >= team->dev->real_num_tx_queues) return -EINVAL; - port->queue_id = ctx->data.u32_val; - team_queue_override_port_refresh(team, port); + team_queue_override_port_change_queue_id(team, port, new_queue_id); return 0; } - static const struct team_option team_options[] = { { .name = "mode", diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index cdc31b5ea15e..829a9cd2b4da 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -112,9 +112,8 @@ static struct team_port *lb_hash_select_tx_port(struct team *team, struct sk_buff *skb, unsigned char hash) { - int port_index; + int port_index = team_num_to_port_index(team, hash); - port_index = hash % team->en_port_count; return team_get_port_by_index_rcu(team, port_index); } diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index d268e4de781b..90311c7375fb 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -30,7 +30,8 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb) struct team_port *port; int port_index; - port_index = rr_priv(team)->sent_packets++ % team->en_port_count; + port_index = team_num_to_port_index(team, + rr_priv(team)->sent_packets++); port = team_get_port_by_index_rcu(team, port_index); port = team_get_first_port_txable_rcu(team, port); if (unlikely(!port)) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 177f911f5946..da866523cf20 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -379,12 +379,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, else snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_alloc_name; - } - err = register_netdevice(dev); if (err < 0) goto err_register_dev; @@ -404,7 +398,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, err_register_dev: /* nothing to do */ -err_alloc_name: err_configure_peer: unregister_netdevice(peer); return err; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4474557904f6..b662045a81c0 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -69,6 +69,7 @@ struct team_port { s32 priority; /* lower number ~ higher priority */ u16 queue_id; struct list_head qom_list; /* node in queue override mapping list */ + struct rcu_head rcu; long mode_priv[0]; }; @@ -228,6 +229,16 @@ static inline struct team_port *team_get_port_by_index(struct team *team, return port; return NULL; } + +static inline int team_num_to_port_index(struct team *team, int num) +{ + int en_port_count = ACCESS_ONCE(team->en_port_count); + + if (unlikely(!en_port_count)) + return 0; + return num % en_port_count; +} + static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 7f2bf1518480..e3362b5f13e8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -84,6 +84,7 @@ struct ip_mc_list { struct ip_mc_list *next; struct ip_mc_list __rcu *next_rcu; }; + struct ip_mc_list __rcu *next_hash; struct timer_list timer; int users; atomic_t refcnt; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ea1e3b863890..b99cd23f3474 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -50,12 +50,17 @@ struct ipv4_devconf { DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); }; +#define MC_HASH_SZ_LOG 9 + struct in_device { struct net_device *dev; atomic_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ + struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ + struct ip_mc_list __rcu * __rcu *mc_hash; + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2ecb96d9a1e5..e5d65573b4d6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1174,8 +1174,10 @@ struct net_device { unsigned char addr_assign_type; /* hw address assignment type */ unsigned char addr_len; /* hardware address length */ unsigned char neigh_priv_len; - unsigned short dev_id; /* for shared network cards */ - + unsigned short dev_id; /* Used to differentiate devices + * that share the same link + * layer address + */ spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 7235ae73a1e8..b21a7f06d6a4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -32,7 +32,6 @@ * * @faddr - Saved first hop address * @nexthop - Saved nexthop address in LSRR and SSRR - * @is_data - Options in __data, rather than skb * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one * @is_changed - IP checksum more not valid diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index df5676029827..6eab63363e59 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -680,7 +680,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, #endif struct psched_ratecfg { - u64 rate_bps; + u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; u8 shift; @@ -698,7 +698,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, const struct psched_ratecfg *r) { memset(res, 0, sizeof(*res)); - res->rate = r->rate_bps >> 3; + res->rate = r->rate_bytes_ps; res->overhead = r->overhead; } diff --git a/include/net/udp.h b/include/net/udp.h index 065f379c6503..b30a71a51839 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -187,6 +187,8 @@ extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int udp_disconnect(struct sock *sk, int flags); extern unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); +extern struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features); extern int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, @@ -262,11 +264,10 @@ extern int udp4_proc_init(void); extern void udp4_proc_exit(void); #endif +extern int udpv4_offload_init(void); + extern void udp_init(void); -extern int udp4_ufo_send_check(struct sk_buff *skb); -extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features); extern void udp_encap_enable(void); #if IS_ENABLED(CONFIG_IPV6) extern void udpv6_encap_enable(void); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 303412d8332b..9640972ec50e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_usec = htonl(timestamp.tv_usec); } +static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, + struct pktgen_dev *pkt_dev, + unsigned int extralen) +{ + struct sk_buff *skb = NULL; + unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + + pkt_dev->pkt_overhead; + + if (pkt_dev->flags & F_NODE) { + int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + } else { + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + } + + return skb; +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; - if (pkt_dev->flags & F_NODE) { - int node; - - if (pkt_dev->node >= 0) - node = pkt_dev->node; - else - node = numa_node_id(); - - skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = odev; - } - } - else - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); - + skb = pktgen_alloc_skb(odev, pkt_dev, datalen); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, datalen); /* Reserve for ethernet and IP header */ @@ -2786,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); + skb = pktgen_alloc_skb(odev, pkt_dev, 16); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4d3e138c564f..7fcf8101d85f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7b514290efc6..5598b06d62db 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1566,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1672,7 +1665,7 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b047e2d8a614..3469506c106d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 450f625361e4..cd71190d2962 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. @@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) in_dev->mc_count++; rcu_assign_pointer(in_dev->mc_list, im); + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { + ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); @@ -1381,13 +1435,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); - in_dev->mr_ifc_count = 0; - in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -2321,12 +2371,25 @@ void ip_mc_drop_socket(struct sock *sk) int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - for_each_pmc_rcu(in_dev, im) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2955b25aee6d..f65bc32c0266 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2290,29 +2290,8 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } -int udp4_ufo_send_check(struct sk_buff *skb) -{ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - const struct iphdr *iph; - struct udphdr *uh; - - iph = ip_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - return 0; -} - -static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; @@ -2371,53 +2350,3 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, out: return segs; } - -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE | SKB_GSO_MPLS) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c new file mode 100644 index 000000000000..f35eccaa855e --- /dev/null +++ b/net/ipv4/udp_offload.c @@ -0,0 +1,100 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv4 GSO support + */ + +#include <linux/skbuff.h> +#include <net/udp.h> +#include <net/protocol.h> + +static int udp4_ufo_send_check(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + return -EINVAL; + + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } + + return 0; +} + +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_GRE | SKB_GSO_MPLS) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } +out: + return segs; +} + +static const struct net_offload udpv4_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + +int __init udpv4_offload_init(void) +{ + return inet_add_offload(&udpv4_offload, IPPROTO_UDP); +} diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..4626cef4b76e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 162fb800754c..1a3655a606c1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f module_param (htb_hysteresis, int, 0640); MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); +static int htb_rate_est = 0; /* htb classes have a default rate estimator */ +module_param(htb_rate_est, int, 0640); +MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -1366,12 +1370,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; + if (htb_rate_est || tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } } cl->refcnt = 1; |