diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_desc.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 102 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 138 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 73 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 6 |
10 files changed, 311 insertions, 97 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index b719f72281c4..160735484465 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -229,6 +229,7 @@ struct gve_rx_ring { /* A TX desc ring entry */ union gve_tx_desc { struct gve_tx_pkt_desc pkt; /* first desc for a packet */ + struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ }; @@ -441,13 +442,13 @@ struct gve_tx_ring { * associated with that irq. */ struct gve_notify_block { - __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */ + __be32 *irq_db_index; /* pointer to idx into Bar2 */ char name[IFNAMSIZ + 16]; /* name registered with the kernel */ struct napi_struct napi; /* kernel napi struct for this block */ struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ -} ____cacheline_aligned; +}; /* Tracks allowed and current queue settings */ struct gve_queue_config { @@ -466,6 +467,10 @@ struct gve_options_dqo_rda { u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ }; +struct gve_irq_db { + __be32 index; +} ____cacheline_aligned; + struct gve_ptype { u8 l3_type; /* `gve_l3_type` in gve_adminq.h */ u8 l4_type; /* `gve_l4_type` in gve_adminq.h */ @@ -492,7 +497,8 @@ struct gve_priv { struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ - dma_addr_t ntfy_block_bus; + struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ + dma_addr_t irq_db_indices_bus; struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */ char mgmt_msix_name[IFNAMSIZ + 16]; u32 mgmt_msix_idx; @@ -551,6 +557,8 @@ struct gve_priv { u32 page_alloc_fail; /* count of page alloc fails */ u32 dma_mapping_error; /* count of dma mapping errors */ u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */ + u32 suspend_cnt; /* count of times suspended */ + u32 resume_cnt; /* count of times resumed */ struct workqueue_struct *gve_wq; struct work_struct service_task; struct work_struct stats_report_task; @@ -567,6 +575,7 @@ struct gve_priv { /* Gvnic device link speed from hypervisor. */ u64 link_speed; + bool up_before_suspend; /* True if dev was up before suspend */ struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; @@ -575,6 +584,10 @@ struct gve_priv { int data_buffer_size_dqo; enum gve_queue_format queue_format; + + /* Interrupt coalescing settings */ + u32 tx_coalesce_usecs; + u32 rx_coalesce_usecs; }; enum gve_service_task_flags_bit { @@ -733,7 +746,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv) static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv, struct gve_notify_block *block) { - return &priv->db_bar2[be32_to_cpu(block->irq_db_index)]; + return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)]; } /* Returns the index into ntfy_blocks of the given tx ring's block @@ -830,7 +843,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction); + enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 83ae56c310d3..f7621ab672b9 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -301,7 +301,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) */ static int gve_adminq_kick_and_wait(struct gve_priv *priv) { - u32 tail, head; + int tail, head; int i; tail = ioread32be(&priv->reg_bar0->adminq_event_counter); @@ -462,7 +462,7 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, .num_counters = cpu_to_be32(num_counters), .irq_db_addr = cpu_to_be64(db_array_bus_addr), .num_irq_dbs = cpu_to_be32(num_ntfy_blks), - .irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])), + .irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)), .ntfy_blk_msix_base_idx = cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX), .queue_format = priv->queue_format, @@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) * is not set to GqiRda, choose the queue format in a priority order: * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default. */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) { - dev_info(&priv->pdev->dev, - "Driver is running with GQI RDA queue format.\n"); - } else if (dev_op_dqo_rda) { + if (dev_op_dqo_rda) { priv->queue_format = GVE_DQO_RDA_FORMAT; dev_info(&priv->pdev->dev, "Driver is running with DQO RDA queue format.\n"); @@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) "Driver is running with GQI RDA queue format.\n"); supported_features_mask = be32_to_cpu(dev_op_gqi_rda->supported_features_mask); + } else if (priv->queue_format == GVE_GQI_RDA_FORMAT) { + dev_info(&priv->pdev->dev, + "Driver is running with GQI RDA queue format.\n"); } else { priv->queue_format = GVE_GQI_QPL_FORMAT; if (dev_op_gqi_qpl) diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h index 4d225a18d8ce..f4ae9e19b844 100644 --- a/drivers/net/ethernet/google/gve/gve_desc.h +++ b/drivers/net/ethernet/google/gve/gve_desc.h @@ -33,6 +33,14 @@ struct gve_tx_pkt_desc { __be64 seg_addr; /* Base address (see note) of this segment */ } __packed; +struct gve_tx_mtd_desc { + u8 type_flags; /* type is lower 4 bits, subtype upper */ + u8 path_state; /* state is lower 4 bits, hash type upper */ + __be16 reserved0; + __be32 path_hash; + __be64 reserved1; +} __packed; + struct gve_tx_seg_desc { u8 type_flags; /* type is lower 4 bits, flags upper */ u8 l3_offset; /* TSO: 2 byte units to start of IPH */ @@ -46,6 +54,7 @@ struct gve_tx_seg_desc { #define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */ #define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */ #define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */ +#define GVE_TXD_MTD (0x3 << 4) /* Metadata */ /* GVE Transmit Descriptor Flags for Std Pkts */ #define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */ @@ -54,6 +63,17 @@ struct gve_tx_seg_desc { /* GVE Transmit Descriptor Flags for TSO Segs */ #define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */ +/* GVE Transmit Descriptor Options for MTD Segs */ +#define GVE_MTD_SUBTYPE_PATH 0 + +#define GVE_MTD_PATH_STATE_DEFAULT 0 +#define GVE_MTD_PATH_STATE_TIMEOUT 1 +#define GVE_MTD_PATH_STATE_CONGESTION 2 +#define GVE_MTD_PATH_STATE_RETRANSMIT 3 + +#define GVE_MTD_PATH_HASH_NONE (0x0 << 4) +#define GVE_MTD_PATH_HASH_L4 (0x1 << 4) + /* GVE Receive Packet Descriptor */ /* The start of an ethernet packet comes 2 bytes into the rx buffer. * gVNIC adds this padding so that both the DMA and the L3/4 protocol header diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index 836042364124..1eb4d5fd8561 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -18,6 +18,7 @@ #define GVE_TX_IRQ_RATELIMIT_US_DQO 50 #define GVE_RX_IRQ_RATELIMIT_US_DQO 20 +#define GVE_MAX_ITR_INTERVAL_DQO (GVE_ITR_INTERVAL_DQO_MASK * 2) /* Timeout in seconds to wait for a reinjection completion after receiving * its corresponding miss completion. @@ -54,17 +55,17 @@ gve_tx_put_doorbell_dqo(const struct gve_priv *priv, } /* Builds register value to write to DQO IRQ doorbell to enable with specified - * ratelimit. + * ITR interval. */ -static inline u32 gve_set_itr_ratelimit_dqo(u32 ratelimit_us) +static inline u32 gve_setup_itr_interval_dqo(u32 interval_us) { u32 result = GVE_ITR_ENABLE_BIT_DQO; /* Interval has 2us granularity. */ - ratelimit_us >>= 1; + interval_us >>= 1; - ratelimit_us &= GVE_ITR_INTERVAL_DQO_MASK; - result |= (ratelimit_us << GVE_ITR_INTERVAL_DQO_SHIFT); + interval_us &= GVE_ITR_INTERVAL_DQO_MASK; + result |= (interval_us << GVE_ITR_INTERVAL_DQO_SHIFT); return result; } @@ -73,9 +74,20 @@ static inline void gve_write_irq_doorbell_dqo(const struct gve_priv *priv, const struct gve_notify_block *block, u32 val) { - u32 index = be32_to_cpu(block->irq_db_index); + u32 index = be32_to_cpu(*block->irq_db_index); iowrite32(val, &priv->db_bar2[index]); } +/* Sets interrupt throttling interval and enables interrupt + * by writing to IRQ doorbell. + */ +static inline void +gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv, + struct gve_notify_block *block, + u32 usecs) +{ + gve_write_irq_doorbell_dqo(priv, block, + gve_setup_itr_interval_dqo(usecs)); +} #endif /* _GVE_DQO_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index c8df47a97fa4..7b9a2d9d9624 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -8,6 +8,7 @@ #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" +#include "gve_dqo.h" static void gve_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) @@ -42,7 +43,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { - "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]", + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", @@ -50,7 +51,7 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { }; static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { - "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]", + "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", "tx_dma_mapping_error[%u]", }; @@ -139,10 +140,11 @@ static void gve_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, - tmp_rx_desc_err_dropped_pkt, tmp_tx_pkts, tmp_tx_bytes; + u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, + tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, + tmp_tx_pkts, tmp_tx_bytes; u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, - rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes; + rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; int stats_idx, base_stats_idx, max_stats_idx; struct stats *report_stats; int *rx_qid_to_stats_idx; @@ -175,14 +177,14 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_rx_ring *rx = &priv->rx[ring]; start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; rx_bytes += tmp_rx_bytes; @@ -191,18 +193,19 @@ gve_get_ethtool_stats(struct net_device *netdev, rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; } } - for (tx_pkts = 0, tx_bytes = 0, ring = 0; + for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; ring < priv->tx_cfg.num_queues; ring++) { if (priv->tx) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_pkts = priv->tx[ring].pkt_done; tmp_tx_bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); tx_pkts += tmp_tx_pkts; tx_bytes += tmp_tx_bytes; + tx_dropped += priv->tx[ring].dropped_pkt; } } @@ -214,9 +217,7 @@ gve_get_ethtool_stats(struct net_device *netdev, /* total rx dropped packets */ data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail + rx_desc_err_dropped_pkt; - /* Skip tx_dropped */ - i++; - + data[i++] = tx_dropped; data[i++] = priv->tx_timeo_cnt; data[i++] = rx_skb_alloc_fail; data[i++] = rx_buf_alloc_fail; @@ -255,15 +256,16 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->fill_cnt; data[i++] = rx->cnt; + data[i++] = rx->fill_cnt - rx->cnt; do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; data[i++] = rx->rx_cont_packet_cnt; @@ -318,18 +320,20 @@ gve_get_ethtool_stats(struct net_device *netdev, if (gve_is_gqi(priv)) { data[i++] = tx->req; data[i++] = tx->done; + data[i++] = tx->req - tx->done; } else { /* DQO doesn't currently support * posted/completed descriptor counts; */ data[i++] = 0; data[i++] = 0; + data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head; } do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); tmp_tx_bytes = tx->bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; @@ -419,7 +423,9 @@ static int gve_set_channels(struct net_device *netdev, } static void gve_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *cmd) + struct ethtool_ringparam *cmd, + struct kernel_ethtool_ringparam *kernel_cmd, + struct netlink_ext_ack *extack) { struct gve_priv *priv = netdev_priv(netdev); @@ -535,7 +541,65 @@ static int gve_get_link_ksettings(struct net_device *netdev, return err; } +static int gve_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_ec, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (gve_is_gqi(priv)) + return -EOPNOTSUPP; + ec->tx_coalesce_usecs = priv->tx_coalesce_usecs; + ec->rx_coalesce_usecs = priv->rx_coalesce_usecs; + + return 0; +} + +static int gve_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_ec, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + u32 tx_usecs_orig = priv->tx_coalesce_usecs; + u32 rx_usecs_orig = priv->rx_coalesce_usecs; + int idx; + + if (gve_is_gqi(priv)) + return -EOPNOTSUPP; + + if (ec->tx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO || + ec->rx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO) + return -EINVAL; + priv->tx_coalesce_usecs = ec->tx_coalesce_usecs; + priv->rx_coalesce_usecs = ec->rx_coalesce_usecs; + + if (tx_usecs_orig != priv->tx_coalesce_usecs) { + for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->tx_coalesce_usecs); + } + } + + if (rx_usecs_orig != priv->rx_coalesce_usecs) { + for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->rx_coalesce_usecs); + } + } + + return 0; +} + const struct ethtool_ops gve_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, @@ -545,6 +609,8 @@ const struct ethtool_ops gve_ethtool_ops = { .set_channels = gve_set_channels, .get_channels = gve_get_channels, .get_link = ethtool_op_get_link, + .get_coalesce = gve_get_coalesce, + .set_coalesce = gve_set_coalesce, .get_ringparam = gve_get_ringparam, .reset = gve_user_reset, .get_tunable = gve_get_tunable, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 59b66f679e46..d3e3ac242bfc 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -51,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->rx[ring].statss); + u64_stats_fetch_begin_irq(&priv->rx[ring].statss); packets = priv->rx[ring].rpackets; bytes = priv->rx[ring].rbytes; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, start)); s->rx_packets += packets; s->rx_bytes += bytes; @@ -64,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin(&priv->tx[ring].statss); + u64_stats_fetch_begin_irq(&priv->tx[ring].statss); packets = priv->tx[ring].pkt_done; bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, start)); s->tx_packets += packets; s->tx_bytes += bytes; @@ -334,15 +334,23 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); goto abort_with_msix_enabled; } - priv->ntfy_blocks = + priv->irq_db_indices = dma_alloc_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->ntfy_blocks), - &priv->ntfy_block_bus, GFP_KERNEL); - if (!priv->ntfy_blocks) { + sizeof(*priv->irq_db_indices), + &priv->irq_db_indices_bus, GFP_KERNEL); + if (!priv->irq_db_indices) { err = -ENOMEM; goto abort_with_mgmt_vector; } + + priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * + sizeof(*priv->ntfy_blocks), GFP_KERNEL); + if (!priv->ntfy_blocks) { + err = -ENOMEM; + goto abort_with_irq_db_indices; + } + /* Setup the other blocks - the first n-1 vectors */ for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; @@ -361,6 +369,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) } irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); + block->irq_db_index = &priv->irq_db_indices[i].index; } return 0; abort_with_some_ntfy_blocks: @@ -372,10 +381,13 @@ abort_with_some_ntfy_blocks: NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); } - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->ntfy_blocks), - priv->ntfy_blocks, priv->ntfy_block_bus); + kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; +abort_with_irq_db_indices: + dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * + sizeof(*priv->irq_db_indices), + priv->irq_db_indices, priv->irq_db_indices_bus); + priv->irq_db_indices = NULL; abort_with_mgmt_vector: free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); abort_with_msix_enabled: @@ -403,10 +415,12 @@ static void gve_free_notify_blocks(struct gve_priv *priv) free_irq(priv->msix_vectors[msix_idx].vector, block); } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); - dma_free_coherent(&priv->pdev->dev, - priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), - priv->ntfy_blocks, priv->ntfy_block_bus); + kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; + dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * + sizeof(*priv->irq_db_indices), + priv->irq_db_indices, priv->irq_db_indices_bus); + priv->irq_db_indices = NULL; pci_disable_msix(priv->pdev); kvfree(priv->msix_vectors); priv->msix_vectors = NULL; @@ -428,7 +442,7 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_adminq_configure_device_resources(priv, priv->counter_array_bus, priv->num_event_counters, - priv->ntfy_block_bus, + priv->irq_db_indices_bus, priv->num_ntfy_blks); if (unlikely(err)) { dev_err(&priv->pdev->dev, @@ -512,8 +526,7 @@ static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(priv->dev, &block->napi, gve_poll); } static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) @@ -752,9 +765,9 @@ static void gve_free_rings(struct gve_priv *priv) int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction dir) + enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(GFP_KERNEL); + *page = alloc_page(gfp_flags); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -797,7 +810,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], - gve_qpl_dma_dir(priv, id)); + gve_qpl_dma_dir(priv, id), GFP_KERNEL); /* caller handles clean up */ if (err) return -ENOMEM; @@ -817,8 +830,7 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, - int id) +static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) { struct gve_queue_page_list *qpl = &priv->qpls[id]; int i; @@ -844,8 +856,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) int i, j; int err; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) + if (num_qpls == 0) return 0; priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); @@ -888,8 +899,7 @@ static void gve_free_qpls(struct gve_priv *priv) int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); int i; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) + if (num_qpls == 0) return; kvfree(priv->qpl_cfg.qpl_id_map); @@ -1100,9 +1110,8 @@ static void gve_turnup(struct gve_priv *priv) if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { - u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO); - - gve_write_irq_doorbell_dqo(priv, block, val); + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->tx_coalesce_usecs); } } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { @@ -1113,9 +1122,8 @@ static void gve_turnup(struct gve_priv *priv) if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { - u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO); - - gve_write_irq_doorbell_dqo(priv, block, val); + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->rx_coalesce_usecs); } } @@ -1265,9 +1273,9 @@ void gve_handle_report_stats(struct gve_priv *priv) } do { - start = u64_stats_fetch_begin(&priv->tx[idx].statss); + start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); tx_bytes = priv->tx[idx].bytes_done; - } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); + } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_WAKE_CNT), .value = cpu_to_be64(priv->tx[idx].wake_queue), @@ -1412,6 +1420,11 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); + if (!gve_is_gqi(priv)) { + priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; + priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; + } + setup_device: err = gve_setup_device_resources(priv); if (!err) @@ -1663,6 +1676,58 @@ static void gve_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void gve_shutdown(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + bool was_up = netif_carrier_ok(priv->dev); + + rtnl_lock(); + if (was_up && gve_close(priv->dev)) { + /* If the dev was up, attempt to close, if close fails, reset */ + gve_reset_and_teardown(priv, was_up); + } else { + /* If the dev wasn't up or close worked, finish tearing down */ + gve_teardown_priv_resources(priv); + } + rtnl_unlock(); +} + +#ifdef CONFIG_PM +static int gve_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + bool was_up = netif_carrier_ok(priv->dev); + + priv->suspend_cnt++; + rtnl_lock(); + if (was_up && gve_close(priv->dev)) { + /* If the dev was up, attempt to close, if close fails, reset */ + gve_reset_and_teardown(priv, was_up); + } else { + /* If the dev wasn't up or close worked, finish tearing down */ + gve_teardown_priv_resources(priv); + } + priv->up_before_suspend = was_up; + rtnl_unlock(); + return 0; +} + +static int gve_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + int err; + + priv->resume_cnt++; + rtnl_lock(); + err = gve_reset_recovery(priv, priv->up_before_suspend); + rtnl_unlock(); + return err; +} +#endif /* CONFIG_PM */ + static const struct pci_device_id gve_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, { } @@ -1673,6 +1738,11 @@ static struct pci_driver gvnic_driver = { .id_table = gve_id_table, .probe = gve_probe, .remove = gve_remove, + .shutdown = gve_shutdown, +#ifdef CONFIG_PM + .suspend = gve_suspend, + .resume = gve_resume, +#endif }; module_pci_driver(gvnic_driver); diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 3d04b5aff331..021bbf308d68 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -86,7 +86,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, dma_addr_t dma; int err; - err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE); + err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, + GFP_ATOMIC); if (err) return err; @@ -438,7 +439,7 @@ static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx) if (frag_size > rx->packet_buffer_size) { packet_size_error = true; netdev_warn(priv->dev, - "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.", + "RX fragment error: packet_buffer_size=%d, frag_size=%d, dropping packet.", rx->packet_buffer_size, be16_to_cpu(desc->len)); } page_info = &rx->data.page_info[idx]; @@ -608,6 +609,7 @@ static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0); *work_done = work_cnt; + skb_record_rx_queue(skb, rx->q_num); if (skb_is_nonlinear(skb)) napi_gro_frags(napi); else @@ -639,8 +641,6 @@ bool gve_rx_work_pending(struct gve_rx_ring *rx) desc = rx->desc.desc_ring + next_idx; flags_seq = desc->flags_seq; - /* Make sure we have synchronized the seq no with the device */ - smp_rmb(); return (GVE_SEQNO(flags_seq) == rx->desc.seqno); } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index beb8bb079023..2e6461b0ea8b 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv, int err; err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page, - &buf_state->addr, DMA_FROM_DEVICE); + &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC); if (err) return err; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index a9cb241fedf4..4888bf05fbed 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -296,11 +296,14 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, return bytes; } -/* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag, - * +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor, - * and +1 if the payload wraps to the beginning of the FIFO. +/* The most descriptors we could need is MAX_SKB_FRAGS + 4 : + * 1 for each skb frag + * 1 for the skb linear portion + * 1 for when tcp hdr needs to be in separate descriptor + * 1 if the payload wraps to the beginning of the FIFO + * 1 for metadata descriptor */ -#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3) +#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 4) static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) { if (info->skb) { @@ -395,6 +398,19 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, pkt_desc->pkt.seg_addr = cpu_to_be64(addr); } +static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc, + struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(mtd_desc->mtd) != sizeof(mtd_desc->pkt)); + + mtd_desc->mtd.type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH; + mtd_desc->mtd.path_state = GVE_MTD_PATH_STATE_DEFAULT | + GVE_MTD_PATH_HASH_L4; + mtd_desc->mtd.path_hash = cpu_to_be32(skb->hash); + mtd_desc->mtd.reserved0 = 0; + mtd_desc->mtd.reserved1 = 0; +} + static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, struct sk_buff *skb, bool is_gso, u16 len, u64 addr) @@ -426,6 +442,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; union gve_tx_desc *pkt_desc, *seg_desc; struct gve_tx_buffer_state *info; + int mtd_desc_nr = !!skb->l4_hash; bool is_gso = skb_is_gso(skb); u32 idx = tx->req & tx->mask; int payload_iov = 2; @@ -457,7 +474,7 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st &info->iov[payload_iov]); gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, - 1 + payload_nfrags, hlen, + 1 + mtd_desc_nr + payload_nfrags, hlen, info->iov[hdr_nfrags - 1].iov_offset); skb_copy_bits(skb, 0, @@ -468,8 +485,13 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st info->iov[hdr_nfrags - 1].iov_len); copy_offset = hlen; + if (mtd_desc_nr) { + next_idx = (tx->req + 1) & tx->mask; + gve_tx_fill_mtd_desc(&tx->desc[next_idx], skb); + } + for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { - next_idx = (tx->req + 1 + i - payload_iov) & tx->mask; + next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; seg_desc = &tx->desc[next_idx]; gve_tx_fill_seg_desc(seg_desc, skb, is_gso, @@ -485,16 +507,17 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st copy_offset += info->iov[i].iov_len; } - return 1 + payload_nfrags; + return 1 + mtd_desc_nr + payload_nfrags; } static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); - int hlen, payload_nfrags, l4_hdr_offset; - union gve_tx_desc *pkt_desc, *seg_desc; + int hlen, num_descriptors, l4_hdr_offset; + union gve_tx_desc *pkt_desc, *mtd_desc, *seg_desc; struct gve_tx_buffer_state *info; + int mtd_desc_nr = !!skb->l4_hash; bool is_gso = skb_is_gso(skb); u32 idx = tx->req & tx->mask; u64 addr; @@ -523,23 +546,30 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, dma_unmap_len_set(info, len, len); dma_unmap_addr_set(info, dma, addr); - payload_nfrags = shinfo->nr_frags; + num_descriptors = 1 + shinfo->nr_frags; + if (hlen < len) + num_descriptors++; + if (mtd_desc_nr) + num_descriptors++; + + gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + num_descriptors, hlen, addr); + + if (mtd_desc_nr) { + idx = (idx + 1) & tx->mask; + mtd_desc = &tx->desc[idx]; + gve_tx_fill_mtd_desc(mtd_desc, skb); + } + if (hlen < len) { /* For gso the rest of the linear portion of the skb needs to * be in its own descriptor. */ - payload_nfrags++; - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, - 1 + payload_nfrags, hlen, addr); - len -= hlen; addr += hlen; - idx = (tx->req + 1) & tx->mask; + idx = (idx + 1) & tx->mask; seg_desc = &tx->desc[idx]; gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); - } else { - gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, - 1 + payload_nfrags, hlen, addr); } for (i = 0; i < shinfo->nr_frags; i++) { @@ -560,11 +590,14 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); } - return 1 + payload_nfrags; + return num_descriptors; unmap_drop: - i += (payload_nfrags == shinfo->nr_frags ? 1 : 2); + i += num_descriptors - shinfo->nr_frags; while (i--) { + /* Skip metadata descriptor, if set */ + if (i == 1 && mtd_desc_nr == 1) + continue; idx--; gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); } diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index ec394d991668..588d64819ed5 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -386,7 +386,7 @@ static int gve_prep_tso(struct sk_buff *skb) (__force __wsum)htonl(paylen)); /* Compute length of segmentation header. */ - header_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + header_len = skb_tcp_all_headers(skb); break; default: return -EINVAL; @@ -598,9 +598,9 @@ static int gve_num_buffer_descs_needed(const struct sk_buff *skb) */ static bool gve_can_send_tso(const struct sk_buff *skb) { - const int header_len = skb_checksum_start_offset(skb) + tcp_hdrlen(skb); const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; const struct skb_shared_info *shinfo = skb_shinfo(skb); + const int header_len = skb_tcp_all_headers(skb); const int gso_size = shinfo->gso_size; int cur_seg_num_bufs; int cur_seg_size; @@ -795,7 +795,7 @@ static void gve_handle_packet_completion(struct gve_priv *priv, GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) { /* No outstanding miss completion but packet allocated * implies packet receives a re-injection completion - * without a a prior miss completion. Return without + * without a prior miss completion. Return without * completing the packet. */ net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n", |