diff options
105 files changed, 1397 insertions, 556 deletions
diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst index a2babd0d7954..595d7ef5fc8b 100644 --- a/Documentation/networking/net_cachelines/inet_sock.rst +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -1,9 +1,9 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC -===================================================== -inet_connection_sock struct fast path usage breakdown -===================================================== +========================================== +inet_sock struct fast path usage breakdown +========================================== Type Name fastpath_tx_access fastpath_rx_access comment ..struct ..inet_sock diff --git a/MAINTAINERS b/MAINTAINERS index 2ecaaec6a6bf..5144282a2578 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14111,6 +14111,17 @@ F: mm/ F: tools/mm/ F: tools/testing/selftests/mm/ +MEMORY MAPPING +M: Andrew Morton <akpm@linux-foundation.org> +R: Liam R. Howlett <Liam.Howlett@oracle.com> +R: Vlastimil Babka <vbabka@suse.cz> +R: Lorenzo Stoakes <lstoakes@gmail.com> +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: mm/mmap.c + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal <miquel.raynal@bootlin.com> M: Richard Weinberger <richard@nod.at> @@ -14369,7 +14380,7 @@ MICROCHIP MCP16502 PMIC DRIVER M: Claudiu Beznea <claudiu.beznea@tuxon.dev> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported -F: Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt +F: Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml F: drivers/regulator/mcp16502.c MICROCHIP MCP3564 ADC DRIVER @@ -17984,33 +17995,34 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath12k/ +N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git -F: Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml F: drivers/net/wireless/ath/ath10k/ +N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER M: Kalle Valo <kvalo@kernel.org> -M: Jeff Johnson <quic_jjohnson@quicinc.com> +M: Jeff Johnson <jjohnson@kernel.org> L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git -F: Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml F: drivers/net/wireless/ath/ath11k/ +N: ath11k QUALCOMM ATHEROS ATH9K WIRELESS DRIVER M: Toke Høiland-Jørgensen <toke@toke.dk> diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef607..467ac2f768ac 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c index 0b6dd8aa21f2..0f1bd7dcde24 100644 --- a/crypto/lskcipher.c +++ b/crypto/lskcipher.c @@ -212,13 +212,12 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, ivsize = crypto_lskcipher_ivsize(tfm); ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(skcipher) + 1); + memcpy(ivs, req->iv, ivsize); flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; if (req->base.flags & CRYPTO_SKCIPHER_REQ_CONT) flags |= CRYPTO_LSKCIPHER_FLAG_CONT; - else - memcpy(ivs, req->iv, ivsize); if (!(req->base.flags & CRYPTO_SKCIPHER_REQ_NOTFINAL)) flags |= CRYPTO_LSKCIPHER_FLAG_FINAL; @@ -234,8 +233,7 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, flags |= CRYPTO_LSKCIPHER_FLAG_CONT; } - if (flags & CRYPTO_LSKCIPHER_FLAG_FINAL) - memcpy(req->iv, ivs, ivsize); + memcpy(req->iv, ivs, ivsize); return err; } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index dbdee2924594..02255795b800 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -525,10 +525,12 @@ static void acpi_ec_clear(struct acpi_ec *ec) static void acpi_ec_enable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) __acpi_ec_enable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* Drain additional events if hardware requires that */ if (EC_FLAGS_CLEAR_ON_RESUME) @@ -544,9 +546,11 @@ static void __acpi_ec_flush_work(void) static void acpi_ec_disable_event(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); __acpi_ec_disable_event(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); /* * When ec_freeze_events is true, we need to flush events in @@ -567,9 +571,10 @@ void acpi_ec_flush_work(void) static bool acpi_ec_guard_event(struct acpi_ec *ec) { + unsigned long flags; bool guarded; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* * If firmware SCI_EVT clearing timing is "event", we actually * don't know when the SCI_EVT will be cleared by firmware after @@ -585,29 +590,31 @@ static bool acpi_ec_guard_event(struct acpi_ec *ec) guarded = ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec->event_state != EC_EVENT_READY && (!ec->curr || ec->curr->command != ACPI_EC_COMMAND_QUERY); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return guarded; } static int ec_transaction_polled(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_POLL)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } static int ec_transaction_completed(struct acpi_ec *ec) { + unsigned long flags; int ret = 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE)) ret = 1; - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return ret; } @@ -749,6 +756,7 @@ static int ec_guard(struct acpi_ec *ec) static int ec_poll(struct acpi_ec *ec) { + unsigned long flags; int repeat = 5; /* number of command restarts */ while (repeat--) { @@ -757,14 +765,14 @@ static int ec_poll(struct acpi_ec *ec) do { if (!ec_guard(ec)) return 0; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); advance_transaction(ec, false); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } while (time_before(jiffies, delay)); pr_debug("controller reset, restart transaction\n"); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } return -ETIME; } @@ -772,10 +780,11 @@ static int ec_poll(struct acpi_ec *ec) static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, struct transaction *t) { + unsigned long tmp; int ret = 0; /* start transaction */ - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); /* Enable GPE for command processing (IBF=0/OBF=1) */ if (!acpi_ec_submit_flushable_request(ec)) { ret = -EINVAL; @@ -786,11 +795,11 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, ec->curr = t; ec_dbg_req("Command(%s) started", acpi_ec_cmd_string(t->command)); start_transaction(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); ret = ec_poll(ec); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, tmp); if (t->irq_count == ec_storm_threshold) acpi_ec_unmask_events(ec); ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command)); @@ -799,7 +808,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, acpi_ec_complete_request(ec); ec_dbg_ref(ec, "Decrease command"); unlock: - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, tmp); return ret; } @@ -927,7 +936,9 @@ EXPORT_SYMBOL(ec_get_handle); static void acpi_ec_start(struct acpi_ec *ec, bool resuming) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (!test_and_set_bit(EC_FLAGS_STARTED, &ec->flags)) { ec_dbg_drv("Starting EC"); /* Enable GPE for event processing (SCI_EVT=1) */ @@ -937,28 +948,31 @@ static void acpi_ec_start(struct acpi_ec *ec, bool resuming) } ec_log_drv("EC started"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static bool acpi_ec_stopped(struct acpi_ec *ec) { + unsigned long flags; bool flushed; - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); flushed = acpi_ec_flushed(ec); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); return flushed; } static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); if (acpi_ec_started(ec)) { ec_dbg_drv("Stopping EC"); set_bit(EC_FLAGS_STOPPED, &ec->flags); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); wait_event(ec->wait, acpi_ec_stopped(ec)); - spin_lock(&ec->lock); + spin_lock_irqsave(&ec->lock, flags); /* Disable GPE for event processing (SCI_EVT=1) */ if (!suspending) { acpi_ec_complete_request(ec); @@ -969,25 +983,29 @@ static void acpi_ec_stop(struct acpi_ec *ec, bool suspending) clear_bit(EC_FLAGS_STOPPED, &ec->flags); ec_log_drv("EC stopped"); } - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_enter_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = true; ec->polling_guard = 0; ec_log_drv("interrupt blocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_leave_noirq(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); ec->busy_polling = ec_busy_polling; ec->polling_guard = ec_polling_guard; ec_log_drv("interrupt unblocked"); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } void acpi_ec_block_transactions(void) @@ -1119,9 +1137,9 @@ static void acpi_ec_event_processor(struct work_struct *work) ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_put_query_handler(handler); kfree(q); @@ -1184,12 +1202,12 @@ static int acpi_ec_submit_query(struct acpi_ec *ec) */ ec_dbg_evt("Query(0x%02x) scheduled", value); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->queries_in_progress++; queue_work(ec_query_wq, &q->work); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); return 0; @@ -1205,14 +1223,14 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event started"); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); while (ec->events_to_process) { - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); acpi_ec_submit_query(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); ec->events_to_process--; } @@ -1229,11 +1247,11 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event stopped"); - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); guard_timeout = !!ec_guard(ec); - spin_lock(&ec->lock); + spin_lock_irq(&ec->lock); /* Take care of SCI_EVT unless someone else is doing that. */ if (guard_timeout && !ec->curr) @@ -1246,7 +1264,7 @@ static void acpi_ec_event_handler(struct work_struct *work) ec->events_in_progress--; - spin_unlock(&ec->lock); + spin_unlock_irq(&ec->lock); } static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt) @@ -1271,11 +1289,13 @@ static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt static void acpi_ec_handle_interrupt(struct acpi_ec *ec) { - spin_lock(&ec->lock); + unsigned long flags; + + spin_lock_irqsave(&ec->lock, flags); clear_gpe_and_advance_transaction(ec, true); - spin_unlock(&ec->lock); + spin_unlock_irqrestore(&ec->lock, flags); } static u32 acpi_ec_gpe_handler(acpi_handle gpe_device, @@ -2085,7 +2105,7 @@ bool acpi_ec_dispatch_gpe(void) * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. */ - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); if (acpi_ec_gpe_status_set(first_ec)) { pm_pr_dbg("ACPI EC GPE status set\n"); @@ -2094,7 +2114,7 @@ bool acpi_ec_dispatch_gpe(void) work_in_progress = acpi_ec_work_in_progress(first_ec); } - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); if (!work_in_progress) return false; @@ -2107,11 +2127,11 @@ bool acpi_ec_dispatch_gpe(void) pm_pr_dbg("ACPI EC work flushed\n"); - spin_lock(&first_ec->lock); + spin_lock_irq(&first_ec->lock); work_in_progress = acpi_ec_work_in_progress(first_ec); - spin_unlock(&first_ec->lock); + spin_unlock_irq(&first_ec->lock); } while (work_in_progress && !pm_wakeup_pending()); return false; diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index fdb0fae88d1c..b40b32fa7f1c 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index a61757835695..9a7243d5db71 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) - set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks); + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); kfree_skb(skb); return 0; @@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) hdev->set_bdaddr = bcm4377_hci_set_bdaddr; hdev->setup = bcm4377_hci_setup; - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); if (bcm4377->hw->broken_mws_transport_config) set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); if (bcm4377->hw->broken_ext_scan) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 94b8c406f0c0..edd2a81b4d5e 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -1806,13 +1807,12 @@ static int qca_power_on(struct hci_dev *hdev) static void hci_coredump_qca(struct hci_dev *hdev) { + int err; static const u8 param[] = { 0x26 }; - struct sk_buff *skb; - skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - bt_dev_err(hdev, "%s: trigger crash failed (%ld)", __func__, PTR_ERR(skb)); - kfree_skb(skb); + err = __hci_cmd_send(hdev, 0xfc0c, 1, param); + if (err < 0) + bt_dev_err(hdev, "%s: trigger crash failed (%d)", __func__, err); } static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id) @@ -1904,7 +1904,17 @@ retry: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ca94e60e705a..79619227ea51 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2987,6 +2987,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 5152bd1b0daf..241db366b2c7 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,6 +42,11 @@ struct dpll_pin_registration { void *priv; }; +struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + struct dpll_device *dpll_device_get_by_id(int id) { if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) @@ -564,7 +569,7 @@ void dpll_pin_put(struct dpll_pin *pin) xa_destroy(&pin->parent_refs); xa_erase(&dpll_pin_xa, pin->id); dpll_pin_prop_free(&pin->prop); - kfree(pin); + kfree_rcu(pin, rcu); } mutex_unlock(&dpll_lock); } diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h index 717f715015c7..2b6d8ef1cdf3 100644 --- a/drivers/dpll/dpll_core.h +++ b/drivers/dpll/dpll_core.h @@ -47,6 +47,7 @@ struct dpll_device { * @prop: pin properties copied from the registerer * @rclk_dev_name: holds name of device when pin can recover clock from it * @refcount: refcount + * @rcu: rcu_head for kfree_rcu() **/ struct dpll_pin { u32 id; @@ -57,6 +58,7 @@ struct dpll_pin { struct xarray parent_refs; struct dpll_pin_properties prop; refcount_t refcount; + struct rcu_head rcu; }; /** diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e451b28840d5..5887feb347a4 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -621,6 +621,7 @@ static void mtd_check_of_node(struct mtd_info *mtd) if (plen == mtd_name_len && !strncmp(mtd->name, pname + offset, plen)) { mtd_set_of_node(mtd, mtd_dn); + of_node_put(mtd_dn); break; } } diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index a46698744850..5b0f5a9cef81 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -290,16 +290,13 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = { MARVELL_LAYOUT( 2048, 512, 4, 1, 1, 2048, 32, 30, 0, 0, 0), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,32, 30), MARVELL_LAYOUT( 2048, 512, 8, 2, 1, 1024, 0, 30,1024,64, 30), - MARVELL_LAYOUT( 2048, 512, 12, 3, 2, 704, 0, 30,640, 0, 30), - MARVELL_LAYOUT( 2048, 512, 16, 5, 4, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 2048, 512, 16, 4, 4, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 4096, 512, 4, 2, 2, 2048, 32, 30, 0, 0, 0), - MARVELL_LAYOUT( 4096, 512, 8, 5, 4, 1024, 0, 30, 0, 64, 30), - MARVELL_LAYOUT( 4096, 512, 12, 6, 5, 704, 0, 30,576, 32, 30), - MARVELL_LAYOUT( 4096, 512, 16, 9, 8, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 4096, 512, 8, 4, 4, 1024, 0, 30, 0, 64, 30), + MARVELL_LAYOUT( 4096, 512, 16, 8, 8, 512, 0, 30, 0, 32, 30), MARVELL_LAYOUT( 8192, 512, 4, 4, 4, 2048, 0, 30, 0, 0, 0), - MARVELL_LAYOUT( 8192, 512, 8, 9, 8, 1024, 0, 30, 0, 160, 30), - MARVELL_LAYOUT( 8192, 512, 12, 12, 11, 704, 0, 30,448, 64, 30), - MARVELL_LAYOUT( 8192, 512, 16, 17, 16, 512, 0, 30, 0, 32, 30), + MARVELL_LAYOUT( 8192, 512, 8, 8, 8, 1024, 0, 30, 0, 160, 30), + MARVELL_LAYOUT( 8192, 512, 16, 16, 16, 512, 0, 30, 0, 32, 30), }; /** diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 987710e09441..6023cba748bb 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index c91b4dcef4ec..760a9a60bc15 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -7,7 +7,6 @@ config NET_VENDOR_ADI bool "Analog Devices devices" default y depends on SPI - select PHYLIB help If you have a network (Ethernet) card belonging to this class, say Y. @@ -22,6 +21,7 @@ config ADIN1110 tristate "Analog Devices ADIN1110 MAC-PHY" depends on SPI && NET_SWITCHDEV select CRC8 + select PHYLIB help Say yes here to build support for Analog Devices ADIN1110 Low Power 10BASE-T1L Ethernet MAC-PHY. diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 9ba15d3183d7..758535adc9ff 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1073,6 +1073,14 @@ int memac_initialization(struct mac_device *mac_dev, unsigned long capabilities; unsigned long *supported; + /* The internal connection to the serdes is XGMII, but this isn't + * really correct for the phy mode (which is the external connection). + * However, this is how all older device trees say that they want + * 10GBASE-R (aka XFI), so just convert it for them. + */ + if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; + mac_dev->phylink_ops = &memac_mac_ops; mac_dev->set_promisc = memac_set_promiscuous; mac_dev->change_addr = memac_modify_mac_address; @@ -1139,7 +1147,7 @@ int memac_initialization(struct mac_device *mac_dev, * (and therefore that xfi_pcs cannot be set). If we are defaulting to * XGMII, assume this is for XFI. Otherwise, assume it is for SGMII. */ - if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) + if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER) memac->xfi_pcs = pcs; else memac->sgmii_pcs = pcs; @@ -1153,14 +1161,6 @@ int memac_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } - /* The internal connection to the serdes is XGMII, but this isn't - * really correct for the phy mode (which is the external connection). - * However, this is how all older device trees say that they want - * 10GBASE-R (aka XFI), so just convert it for them. - */ - if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) - mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER; - /* TODO: The following interface modes are supported by (some) hardware * but not by this driver: * - 1000BASE-KX diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 7ac847718882..c979192e44d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -190,15 +190,13 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) q_vector = vsi->q_vectors[v_idx]; ice_for_each_tx_ring(tx_ring, q_vector->tx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, NULL); + ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX, + NULL); tx_ring->q_vector = NULL; } ice_for_each_rx_ring(rx_ring, q_vector->rx) { - if (vsi->netdev) - netif_queue_set_napi(vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, NULL); + ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX, + NULL); rx_ring->q_vector = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index b9c5eced6326..adfa1f2a80a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -31,6 +31,26 @@ static const char * const pin_type_name[] = { }; /** + * ice_dpll_is_reset - check if reset is in progress + * @pf: private board structure + * @extack: error reporting + * + * If reset is in progress, fill extack with error. + * + * Return: + * * false - no reset in progress + * * true - reset in progress + */ +static bool ice_dpll_is_reset(struct ice_pf *pf, struct netlink_ext_ack *extack) +{ + if (ice_is_reset_in_progress(pf->state)) { + NL_SET_ERR_MSG(extack, "PF reset in progress"); + return true; + } + return false; +} + +/** * ice_dpll_pin_freq_set - set pin's frequency * @pf: private board structure * @pin: pointer to a pin @@ -109,6 +129,9 @@ ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack); mutex_unlock(&pf->dplls.lock); @@ -254,6 +277,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, * ice_dpll_pin_enable - enable a pin on dplls * @hw: board private hw structure * @pin: pointer to a pin + * @dpll_idx: dpll index to connect to output pin * @pin_type: type of pin being enabled * @extack: error reporting * @@ -266,7 +290,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, */ static int ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, - enum ice_dpll_pin_type pin_type, + u8 dpll_idx, enum ice_dpll_pin_type pin_type, struct netlink_ext_ack *extack) { u8 flags = 0; @@ -280,10 +304,12 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0); break; case ICE_DPLL_PIN_TYPE_OUTPUT: + flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL; if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN; - ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0); + ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, dpll_idx, + 0, 0); break; default: return -EINVAL; @@ -370,7 +396,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, case ICE_DPLL_PIN_TYPE_INPUT: ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL, NULL, &pin->flags[0], - &pin->freq, NULL); + &pin->freq, &pin->phase_adjust); if (ret) goto err; if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) { @@ -398,14 +424,27 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, break; case ICE_DPLL_PIN_TYPE_OUTPUT: ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx, - &pin->flags[0], NULL, + &pin->flags[0], &parent, &pin->freq, NULL); if (ret) goto err; - if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) - pin->state[0] = DPLL_PIN_STATE_CONNECTED; - else - pin->state[0] = DPLL_PIN_STATE_DISCONNECTED; + + parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL; + if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) { + pin->state[pf->dplls.eec.dpll_idx] = + parent == pf->dplls.eec.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + parent == pf->dplls.pps.dpll_idx ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_DISCONNECTED; + } else { + pin->state[pf->dplls.eec.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + } break; case ICE_DPLL_PIN_TYPE_RCLK_INPUT: for (parent = 0; parent < pf->dplls.rclk.num_parents; @@ -568,9 +607,13 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); if (enable) - ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack); + ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type, + extack); else ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack); if (!ret) @@ -603,6 +646,11 @@ ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv, struct netlink_ext_ack *extack) { bool enable = state == DPLL_PIN_STATE_CONNECTED; + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + + if (!enable && p->state[d->dpll_idx] == DPLL_PIN_STATE_DISCONNECTED) + return 0; return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable, extack, ICE_DPLL_PIN_TYPE_OUTPUT); @@ -665,14 +713,16 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); if (ret) goto unlock; - if (pin_type == ICE_DPLL_PIN_TYPE_INPUT) + if (pin_type == ICE_DPLL_PIN_TYPE_INPUT || + pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) *state = p->state[d->dpll_idx]; - else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) - *state = p->state[0]; ret = 0; unlock: mutex_unlock(&pf->dplls.lock); @@ -790,6 +840,9 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); mutex_unlock(&pf->dplls.lock); @@ -910,6 +963,9 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, u8 flag, flags_en = 0; int ret; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); switch (type) { case ICE_DPLL_PIN_TYPE_INPUT: @@ -1069,6 +1125,9 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) @@ -1123,6 +1182,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv, int ret = -EINVAL; u32 hw_idx; + if (ice_dpll_is_reset(pf, extack)) + return -EBUSY; + mutex_lock(&pf->dplls.lock); hw_idx = parent->idx - pf->dplls.base_rclk_idx; if (hw_idx >= pf->dplls.num_inputs) @@ -1305,8 +1367,10 @@ static void ice_dpll_periodic_work(struct kthread_work *work) struct ice_pf *pf = container_of(d, struct ice_pf, dplls); struct ice_dpll *de = &pf->dplls.eec; struct ice_dpll *dp = &pf->dplls.pps; - int ret; + int ret = 0; + if (ice_is_reset_in_progress(pf->state)) + goto resched; mutex_lock(&pf->dplls.lock); ret = ice_dpll_update_state(pf, de, false); if (!ret) @@ -1326,6 +1390,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work) ice_dpll_notify_changes(de); ice_dpll_notify_changes(dp); +resched: /* Run twice a second or reschedule if update failed */ kthread_queue_delayed_work(d->kworker, &d->work, ret ? msecs_to_jiffies(10) : diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 9be724291ef8..097bf8fd6bf0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2426,7 +2426,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) ice_vsi_map_rings_to_vectors(vsi); /* Associate q_vector rings to napi */ - ice_vsi_set_napi_queues(vsi, true); + ice_vsi_set_napi_queues(vsi); vsi->stat_offsets_loaded = false; @@ -2904,19 +2904,19 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) } /** - * ice_queue_set_napi - Set the napi instance for the queue + * __ice_queue_set_napi - Set the napi instance for the queue * @dev: device to which NAPI and queue belong * @queue_index: Index of queue * @type: queue type as RX or TX * @napi: NAPI context * @locked: is the rtnl_lock already held * - * Set the napi instance for the queue + * Set the napi instance for the queue. Caller indicates the lock status. */ static void -ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi, - bool locked) +__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi, + bool locked) { if (!locked) rtnl_lock(); @@ -2926,26 +2926,79 @@ ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, } /** - * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * ice_queue_set_napi - Set the napi instance for the queue + * @vsi: VSI being configured + * @queue_index: Index of queue + * @type: queue type as RX or TX + * @napi: NAPI context + * + * Set the napi instance for the queue. The rtnl lock state is derived from the + * execution path. + */ +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi) +{ + struct ice_pf *pf = vsi->back; + + if (!vsi->netdev) + return; + + if (current_work() == &pf->serv_task || + test_bit(ICE_PREPARED_FOR_RESET, pf->state) || + test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_SUSPENDED, pf->state)) + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + false); + else + __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, + true); +} + +/** + * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi * @q_vector: q_vector pointer * @locked: is the rtnl_lock already held * + * Associate the q_vector napi with all the queue[s] on the vector. + * Caller indicates the lock status. + */ +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +{ + struct ice_rx_ring *rx_ring; + struct ice_tx_ring *tx_ring; + + ice_for_each_rx_ring(rx_ring, q_vector->rx) + __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi, + locked); + + ice_for_each_tx_ring(tx_ring, q_vector->tx) + __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi, + locked); + /* Also set the interrupt number for the NAPI */ + netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); +} + +/** + * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi + * @q_vector: q_vector pointer + * * Associate the q_vector napi with all the queue[s] on the vector */ -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector) { struct ice_rx_ring *rx_ring; struct ice_tx_ring *tx_ring; ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, rx_ring->q_index, + NETDEV_QUEUE_TYPE_RX, &q_vector->napi); ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, &q_vector->napi, - locked); + ice_queue_set_napi(q_vector->vsi, tx_ring->q_index, + NETDEV_QUEUE_TYPE_TX, &q_vector->napi); /* Also set the interrupt number for the NAPI */ netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); } @@ -2953,11 +3006,10 @@ void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) /** * ice_vsi_set_napi_queues * @vsi: VSI pointer - * @locked: is the rtnl_lock already held * * Associate queue[s] with napi for all vectors */ -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) +void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { int i; @@ -2965,7 +3017,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked) return; ice_for_each_q_vector(vsi, i) - ice_q_vector_set_napi_queues(vsi->q_vectors[i], locked); + ice_q_vector_set_napi_queues(vsi->q_vectors[i]); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 71bd27244941..bfcfc582a4c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -91,9 +91,15 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); +void +ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi); + +void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); + +void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector); -void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked); +void ice_vsi_set_napi_queues(struct ice_vsi *vsi); int ice_vsi_release(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index dd4a9bc0dfdc..59c7e37f175f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3495,7 +3495,7 @@ static void ice_napi_add(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, ice_napi_poll); - ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); + __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); } } @@ -5447,6 +5447,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + ice_vsi_set_napi_queues(pf->vsi[v]); } ret = ice_req_irq_msix_misc(pf); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 319c544b9f04..f94570556120 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -957,7 +957,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1003,6 +1003,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1022,7 +1023,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 10a9d80db32c..6ba8d4aca0a0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -93,6 +93,7 @@ static void ionic_unmap_bars(struct ionic *ionic) bars[i].len = 0; } } + ionic->num_bars = 0; } void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num) @@ -215,13 +216,15 @@ out: static void ionic_clear_pci(struct ionic *ionic) { - ionic->idev.dev_info_regs = NULL; - ionic->idev.dev_cmd_regs = NULL; - ionic->idev.intr_status = NULL; - ionic->idev.intr_ctrl = NULL; - - ionic_unmap_bars(ionic); - pci_release_regions(ionic->pdev); + if (ionic->num_bars) { + ionic->idev.dev_info_regs = NULL; + ionic->idev.dev_cmd_regs = NULL; + ionic->idev.intr_status = NULL; + ionic->idev.intr_ctrl = NULL; + + ionic_unmap_bars(ionic); + pci_release_regions(ionic->pdev); + } if (pci_is_enabled(ionic->pdev)) pci_disable_device(ionic->pdev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 1e7c71f7f081..746072b4dbd0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -319,22 +319,32 @@ do_check_time: u8 ionic_dev_cmd_status(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return (u8)PCI_ERROR_RESPONSE; return ioread8(&idev->dev_cmd_regs->comp.comp.status); } bool ionic_dev_cmd_done(struct ionic_dev *idev) { + if (!idev->dev_cmd_regs) + return false; return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE; } void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) { + if (!idev->dev_cmd_regs) + return; memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp)); } void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) { idev->opcode = cmd->cmd.opcode; + + if (!idev->dev_cmd_regs) + return; + memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); iowrite32(0, &idev->dev_cmd_regs->done); iowrite32(1, &idev->dev_cmd_regs->doorbell); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index cd3c0b01402e..0ffc9c4904ac 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -90,18 +90,23 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev; unsigned int offset; unsigned int size; regs->version = IONIC_DEV_CMD_REG_VERSION; + idev = &lif->ionic->idev; + if (!idev->dev_info_regs) + return; + offset = 0; size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32); memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size); offset += size; size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32); - memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); + memcpy_fromio(p + offset, idev->dev_cmd_regs->words, size); } static void ionic_get_link_ext_stats(struct net_device *netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c index 5f40324cd243..3c209c1a2337 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_fw.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c @@ -109,6 +109,11 @@ int ionic_firmware_update(struct ionic_lif *lif, const struct firmware *fw, dl = priv_to_devlink(ionic); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); + if (!idev->dev_cmd_regs) { + err = -ENXIO; + goto err_out; + } + buf_sz = sizeof(idev->dev_cmd_regs->data); netdev_dbg(netdev, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index cf2d5ad7b68c..fcb44ceeb6aa 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3559,7 +3559,10 @@ int ionic_lif_init(struct ionic_lif *lif) goto err_out_notifyq_deinit; } - err = ionic_init_nic_features(lif); + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + err = ionic_set_nic_features(lif, lif->netdev->features); + else + err = ionic_init_nic_features(lif); if (err) goto err_out_notifyq_deinit; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 165ab08ad2dd..2f479de329fe 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -416,6 +416,9 @@ static void ionic_dev_cmd_clean(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; + if (!idev->dev_cmd_regs) + return; + iowrite32(0, &idev->dev_cmd_regs->doorbell); memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e80d77bd9f1f..7c6aef033a45 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2672,7 +2672,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, } if (skb) { stmmac_get_tx_hwtstamp(priv, p, skb); - } else { + } else if (tx_q->xsk_pool && + xp_tx_metadata_enabled(tx_q->xsk_pool)) { struct stmmac_xsk_tx_complete tx_compl = { .priv = priv, .desc = p, @@ -4005,8 +4006,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index d5b75af163d3..c1b0d35c8d05 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -384,18 +384,18 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) dev_info(ctodev(card), "%s: ERROR status\n", __func__); - descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); - if (!descr->skb) { - descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ - return -ENOMEM; - } descr->hw_regs.dmac_cmd_status = 0; descr->hw_regs.result_size = 0; descr->hw_regs.valid_size = 0; descr->hw_regs.data_error = 0; descr->hw_regs.payload.dev_addr = 0; descr->hw_regs.payload.size = 0; - descr->skb = NULL; + + descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); + if (!descr->skb) { + descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ + return -ENOMEM; + } offset = ((unsigned long)descr->skb->data) & (GELIC_NET_RXBUF_ALIGN - 1); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 2129ae42c703..2b5357d94ff5 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1903,26 +1903,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = register_pernet_subsys(>p_net_ops); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_pernet_subsys; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_pernet_subsys: - unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4a4f8c8e79fa..8f95a562b8d0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a972d..8b6d6a1b3c2e 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a6d653ff552a..ba6c8ac2a736 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; @@ -3033,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index a530f20ee257..2fa46baa589e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2105,6 +2105,11 @@ static const struct usb_device_id products[] = { .driver_info = (unsigned long) &smsc95xx_info, }, { + /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */ + USB_DEVICE(0x0878, 0x1400), + .driver_info = (unsigned long)&smsc95xx_info, + }, + { /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ USB_DEVICE(0x184F, 0x0051), .driver_info = (unsigned long)&smsc95xx_info, diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 578e36ea1589..cd4a6fe458f9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1208,14 +1208,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1235,18 +1227,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1478,7 +1461,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1494,7 +1478,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) @@ -1654,6 +1638,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1669,6 +1661,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h index 9c69d3674384..e6c0f928a6bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2019-2021, 2023 Intel Corporation + * Copyright (C) 2005-2014, 2019-2021, 2023-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -66,6 +66,16 @@ enum iwl_gen2_tx_fifo { IWL_GEN2_TRIG_TX_FIFO_VO, }; +enum iwl_bz_tx_fifo { + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_VO, +}; /** * enum iwl_tx_queue_cfg_actions - TXQ config options * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 4582afb149d7..05b64176859e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1279,7 +1279,9 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, mvm->net_detect = true; } else { - struct iwl_wowlan_config_cmd wowlan_config_cmd = {}; + struct iwl_wowlan_config_cmd wowlan_config_cmd = { + .offloading_tid = 0, + }; wowlan_config_cmd.sta_id = mvmvif->deflink.ap_sta_id; @@ -1291,6 +1293,11 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, goto out_noreset; } + ret = iwl_mvm_sta_ensure_queue( + mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]); + if (ret) + goto out_noreset; + ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd, vif, mvmvif, ap_sta); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index c4f96125cf33..25a5a31e63c2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -31,6 +31,17 @@ const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = { IWL_GEN2_TRIG_TX_FIFO_BK, }; +const u8 iwl_mvm_ac_to_bz_tx_fifo[] = { + IWL_BZ_EDCA_TX_FIFO_VO, + IWL_BZ_EDCA_TX_FIFO_VI, + IWL_BZ_EDCA_TX_FIFO_BE, + IWL_BZ_EDCA_TX_FIFO_BK, + IWL_BZ_TRIG_TX_FIFO_VO, + IWL_BZ_TRIG_TX_FIFO_VI, + IWL_BZ_TRIG_TX_FIFO_BE, + IWL_BZ_TRIG_TX_FIFO_BK, +}; + struct iwl_mvm_mac_iface_iterator_data { struct iwl_mvm *mvm; struct ieee80211_vif *vif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 40627961b834..81dbef6947f5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1581,12 +1581,16 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm, extern const u8 iwl_mvm_ac_to_tx_fifo[]; extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[]; +extern const u8 iwl_mvm_ac_to_bz_tx_fifo[]; static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm, enum ieee80211_ac_numbers ac) { - return iwl_mvm_has_new_tx_api(mvm) ? - iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac]; + if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) + return iwl_mvm_ac_to_bz_tx_fifo[ac]; + if (iwl_mvm_has_new_tx_api(mvm)) + return iwl_mvm_ac_to_gen2_tx_fifo[ac]; + return iwl_mvm_ac_to_tx_fifo[ac]; } struct iwl_rate_info { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 2a3ca9785974..c2e0cff740e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1502,6 +1502,34 @@ out_err: return ret; } +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, + struct ieee80211_txq *txq) +{ + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + int ret = -EINVAL; + + lockdep_assert_held(&mvm->mutex); + + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { + return 0; + } + + if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) { + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + ret = 0; + } + + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + if (!list_empty(&mvmtxq->list)) + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + local_bh_enable(); + + return ret; +} + void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) { struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index b33a0ce096d4..3cf8a70274ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2023 Intel Corporation + * Copyright (C) 2012-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2016 Intel Deutschland GmbH */ @@ -571,6 +571,7 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, bool disable); void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher, diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index 830a1c4cd705..8bbcd983a74a 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -29,8 +29,8 @@ struct max5970_regulator { }; enum max597x_regulator_id { - MAX597X_SW0, - MAX597X_SW1, + MAX597X_sw0, + MAX597X_sw1, }; static int max5970_read_adc(struct regmap *regmap, int reg, long *val) @@ -378,8 +378,8 @@ static int max597x_dt_parse(struct device_node *np, } static const struct regulator_desc regulators[] = { - MAX597X_SWITCH(SW0, MAX5970_REG_CHXEN, 0, "vss1"), - MAX597X_SWITCH(SW1, MAX5970_REG_CHXEN, 1, "vss2"), + MAX597X_SWITCH(sw0, MAX5970_REG_CHXEN, 0, "vss1"), + MAX597X_SWITCH(sw1, MAX5970_REG_CHXEN, 1, "vss2"), }; static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index f94e0d370d46..1a8d03958dff 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1927,24 +1927,18 @@ static void cqspi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); } -static int cqspi_suspend(struct device *dev) +static int cqspi_runtime_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); - int ret; - ret = spi_controller_suspend(host); cqspi_controller_enable(cqspi, 0); - clk_disable_unprepare(cqspi->clk); - - return ret; + return 0; } -static int cqspi_resume(struct device *dev) +static int cqspi_runtime_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); - struct spi_controller *host = dev_get_drvdata(dev); clk_prepare_enable(cqspi->clk); cqspi_wait_idle(cqspi); @@ -1952,12 +1946,27 @@ static int cqspi_resume(struct device *dev) cqspi->current_cs = -1; cqspi->sclk = 0; + return 0; +} + +static int cqspi_suspend(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_suspend(cqspi->host); +} - return spi_controller_resume(host); +static int cqspi_resume(struct device *dev) +{ + struct cqspi_st *cqspi = dev_get_drvdata(dev); + + return spi_controller_resume(cqspi->host); } -static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, - cqspi_resume, NULL); +static const struct dev_pm_ops cqspi_dev_pm_ops = { + RUNTIME_PM_OPS(cqspi_runtime_suspend, cqspi_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(cqspi_suspend, cqspi_resume) +}; static const struct cqspi_driver_platdata cdns_qspi = { .quirks = CQSPI_DISABLE_DAC_MODE, diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 942c3117ab3a..82d6264841fc 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -359,22 +359,22 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) /* Setup the state for the bitbang driver */ bbp = &hw->bitbang; - bbp->ctlr = hw->host; + bbp->master = hw->host; bbp->setup_transfer = spi_ppc4xx_setupxfer; bbp->txrx_bufs = spi_ppc4xx_txrx; bbp->use_dma = 0; - bbp->ctlr->setup = spi_ppc4xx_setup; - bbp->ctlr->cleanup = spi_ppc4xx_cleanup; - bbp->ctlr->bits_per_word_mask = SPI_BPW_MASK(8); - bbp->ctlr->use_gpio_descriptors = true; + bbp->master->setup = spi_ppc4xx_setup; + bbp->master->cleanup = spi_ppc4xx_cleanup; + bbp->master->bits_per_word_mask = SPI_BPW_MASK(8); + bbp->master->use_gpio_descriptors = true; /* * The SPI core will count the number of GPIO descriptors to figure * out the number of chip selects available on the platform. */ - bbp->ctlr->num_chipselect = 0; + bbp->master->num_chipselect = 0; /* the spi->mode bits understood by this driver: */ - bbp->ctlr->mode_bits = + bbp->master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST; /* Get the clock for the OPB */ diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index ceb5f586a2d5..1043a8142351 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -494,7 +494,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); - if (unlikely(block_rsv->size == 0)) + if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve; again: ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index b0bd12b8652f..43a9a6b5a79f 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -101,4 +101,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) return data_race(rsv->full); } +/* + * Get the reserved mount of a block reserve in a context where getting a stale + * value is acceptable, instead of accessing it directly and trigger data race + * warning from KCSAN. + */ +static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->reserved; + spin_unlock(&rsv->lock); + + return ret; +} + +/* + * Get the size of a block reserve in a context where getting a stale value is + * acceptable, instead of accessing it directly and trigger data race warning + * from KCSAN. + */ +static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv) +{ + u64 ret; + + spin_lock(&rsv->lock); + ret = rsv->size; + spin_unlock(&rsv->lock); + + return ret; +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1502d664c892..79c4293ddf37 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -725,6 +725,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -737,10 +754,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7902298c1f25..e48a063ef085 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6705,11 +6705,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 571bb13587d5..3b54eb583474 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -856,7 +856,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info) { - u64 global_rsv_size = fs_info->global_block_rsv.reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); u64 ordered, delalloc; u64 thresh; u64 used; @@ -956,8 +956,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1; delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes); if (ordered >= delalloc) - used += fs_info->delayed_refs_rsv.reserved + - fs_info->delayed_block_rsv.reserved; + used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) + + btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv); else used += space_info->bytes_may_use - global_rsv_size; @@ -1173,7 +1173,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) enum btrfs_flush_state flush; u64 delalloc_size = 0; u64 to_reclaim, block_rsv_size; - u64 global_rsv_size = global_rsv->reserved; + const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); loops++; @@ -1185,9 +1185,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * assume it's tied up in delalloc reservations. */ block_rsv_size = global_rsv_size + - delayed_block_rsv->reserved + - delayed_refs_rsv->reserved + - trans_rsv->reserved; + btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv) + + btrfs_block_rsv_reserved(trans_rsv); if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; @@ -1207,16 +1207,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) to_reclaim = delalloc_size; flush = FLUSH_DELALLOC; } else if (space_info->bytes_pinned > - (delayed_block_rsv->reserved + - delayed_refs_rsv->reserved)) { + (btrfs_block_rsv_reserved(delayed_block_rsv) + + btrfs_block_rsv_reserved(delayed_refs_rsv))) { to_reclaim = space_info->bytes_pinned; flush = COMMIT_TRANS; - } else if (delayed_block_rsv->reserved > - delayed_refs_rsv->reserved) { - to_reclaim = delayed_block_rsv->reserved; + } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > + btrfs_block_rsv_reserved(delayed_refs_rsv)) { + to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv); flush = FLUSH_DELAYED_ITEMS_NR; } else { - to_reclaim = delayed_refs_rsv->reserved; + to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv); flush = FLUSH_DELAYED_REFS_NR; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3a5d69ff25fc..5f750fa53a2b 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1639,6 +1639,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: + /* Reject non SINGLE data profiles without RST */ + if ((map->type & BTRFS_BLOCK_GROUP_DATA) && + (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + !fs_info->stripe_root) { + btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", + btrfs_bg_type_to_raid_name(map->type)); + return -EINVAL; + } + if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 3b42938a9d3b..7f27382e0ce2 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2457,7 +2457,6 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct ATTR_LIST_ENTRY *le = NULL; struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; - loff_t i_size = i_size_read(&ni->vfs_inode); u64 vbo_disk; size_t unc_size; u32 frame_size, i, npages_disk, ondisk_size; @@ -2509,6 +2508,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, err = -EOPNOTSUPP; goto out1; #else + loff_t i_size = i_size_read(&ni->vfs_inode); u32 frame_bits = ni_ext_compress_bits(ni); u64 frame64 = frame_vbo >> frame_bits; u64 frames, vbo_data; diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 9cf896ea1d41..c60591308ae8 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -10,6 +10,8 @@ #include <uapi/linux/dpll.h> #include <linux/device.h> #include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> struct dpll_device; struct dpll_pin; @@ -167,4 +169,13 @@ int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); +#if !IS_ENABLED(CONFIG_DPLL) +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ + return NULL; +} +#else +struct dpll_pin *netdev_dpll_pin(const struct net_device *dev); +#endif + #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef7bfbb98497..a9c973b92294 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2469,7 +2469,7 @@ struct net_device { struct devlink_port *devlink_port; #if IS_ENABLED(CONFIG_DPLL) - struct dpll_pin *dpll_pin; + struct dpll_pin __rcu *dpll_pin; #endif #if IS_ENABLED(CONFIG_PAGE_POOL) /** @page_pools: page pools created for this netdevice */ @@ -4035,15 +4035,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); void netdev_dpll_pin_clear(struct net_device *dev); -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_DPLL) - return dev->dpll_pin; -#else - return NULL; -#endif -} - struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 80900d910992..ce660d51549b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -474,6 +474,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/poison.h b/include/linux/poison.h index 27a7dad17eef..1f0ee2459f2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -92,4 +92,7 @@ /********** VFS **********/ #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) +/********** lib/stackdepot.c **********/ +#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) + #endif diff --git a/include/net/mctp.h b/include/net/mctp.h index da86e106c91d..2bff5f47ce82 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -249,6 +249,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab959..ff8d21f9e95b 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 6cd2a4e3afb8..9ff018245840 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) { int size; - if (num <= 0) - return -EINVAL; - if (!fp->exit_handler) { fp->rethook = NULL; return 0; @@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) /* Initialize rethook if needed */ if (fp->nr_maxactive) - size = fp->nr_maxactive; + num = fp->nr_maxactive; else - size = num * num_possible_cpus() * 2; - if (size <= 0) + num *= num_possible_cpus() * 2; + if (num <= 0) return -EINVAL; + size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; + /* Initialize rethook */ - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, - sizeof(struct fprobe_rethook_node), size); + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); if (IS_ERR(fp->rethook)) return PTR_ERR(fp->rethook); diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 225bb7701460..bf70850035c7 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -215,7 +215,7 @@ static const u32 init_sums_no_overflow[] = { 0xffff0000, 0xfffffffb, }; -static const __sum16 expected_csum_ipv6_magic[] = { +static const u16 expected_csum_ipv6_magic[] = { 0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0, 0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e, 0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d, @@ -241,7 +241,7 @@ static const __sum16 expected_csum_ipv6_magic[] = { 0x3845, 0x1014 }; -static const __sum16 expected_fast_csum[] = { +static const u16 expected_fast_csum[] = { 0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187, 0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a, 0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a, @@ -577,7 +577,8 @@ static void test_csum_no_carry_inputs(struct kunit *test) static void test_ip_fast_csum(struct kunit *test) { - __sum16 csum_result, expected; + __sum16 csum_result; + u16 expected; for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) { for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) { @@ -586,7 +587,7 @@ static void test_ip_fast_csum(struct kunit *test) expected_fast_csum[(len - IPv4_MIN_WORDS) * NUM_IP_FAST_CSUM_TESTS + index]; - CHECK_EQ(expected, csum_result); + CHECK_EQ(to_sum16(expected), csum_result); } } } @@ -598,7 +599,7 @@ static void test_csum_ipv6_magic(struct kunit *test) const struct in6_addr *daddr; unsigned int len; unsigned char proto; - unsigned int csum; + __wsum csum; const int daddr_offset = sizeof(struct in6_addr); const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr); @@ -611,10 +612,10 @@ static void test_csum_ipv6_magic(struct kunit *test) saddr = (const struct in6_addr *)(random_buf + i); daddr = (const struct in6_addr *)(random_buf + i + daddr_offset); - len = *(unsigned int *)(random_buf + i + len_offset); + len = le32_to_cpu(*(__le32 *)(random_buf + i + len_offset)); proto = *(random_buf + i + proto_offset); - csum = *(unsigned int *)(random_buf + i + csum_offset); - CHECK_EQ(expected_csum_ipv6_magic[i], + csum = *(__wsum *)(random_buf + i + csum_offset); + CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]), csum_ipv6_magic(saddr, daddr, len, proto, csum)); } #endif /* !CONFIG_NET */ diff --git a/lib/nlattr.c b/lib/nlattr.c index ed2ab43e1b22..be9c576b6e2d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 5caa1f566553..4a7055a63d9f 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -22,6 +22,7 @@ #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/poison.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -43,17 +44,7 @@ #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ STACK_DEPOT_EXTRA_BITS) -#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32 -/* - * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack - * traces. As KMSAN does not support evicting stack traces from the stack - * depot, the stack depot capacity might be reached quickly with large stack - * records. Adjust the maximum number of stack depot pools for this case. - */ -#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16)) -#else #define DEPOT_POOLS_CAP 8192 -#endif #define DEPOT_MAX_POOLS \ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) @@ -93,9 +84,6 @@ struct stack_record { }; }; -#define DEPOT_STACK_RECORD_SIZE \ - ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) - static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; @@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS]; static void *new_pool; /* Number of pools in stack_pools. */ static int pools_num; +/* Offset to the unused space in the currently used pool. */ +static size_t pool_offset = DEPOT_POOL_SIZE; /* Freelist of stack records within stack_pools. */ static LIST_HEAD(free_stacks); -/* - * Stack depot tries to keep an extra pool allocated even before it runs out - * of space in the currently used pool. This flag marks whether this extra pool - * needs to be allocated. It has the value 0 when either an extra pool is not - * yet allocated or if the limit on the number of pools is reached. - */ -static bool new_pool_required = true; /* The lock must be held when performing pool or freelist modifications. */ static DEFINE_RAW_SPINLOCK(pool_lock); /* Statistics counters for debugfs. */ enum depot_counter_id { - DEPOT_COUNTER_ALLOCS, - DEPOT_COUNTER_FREES, - DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_REFD_ALLOCS, + DEPOT_COUNTER_REFD_FREES, + DEPOT_COUNTER_REFD_INUSE, DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_PERSIST_COUNT, + DEPOT_COUNTER_PERSIST_BYTES, DEPOT_COUNTER_COUNT, }; static long counters[DEPOT_COUNTER_COUNT]; static const char *const counter_names[] = { - [DEPOT_COUNTER_ALLOCS] = "allocations", - [DEPOT_COUNTER_FREES] = "frees", - [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", + [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", + [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", + [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", + [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", }; static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); @@ -294,48 +281,52 @@ out_unlock: EXPORT_SYMBOL_GPL(stack_depot_init); /* - * Initializes new stack depot @pool, release all its entries to the freelist, - * and update the list of pools. + * Initializes new stack pool, and updates the list of pools. */ -static void depot_init_pool(void *pool) +static bool depot_init_pool(void **prealloc) { - int offset; - lockdep_assert_held(&pool_lock); - /* Initialize handles and link stack records into the freelist. */ - for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; - offset += DEPOT_STACK_RECORD_SIZE) { - struct stack_record *stack = pool + offset; - - stack->handle.pool_index = pools_num; - stack->handle.offset = offset >> DEPOT_STACK_ALIGN; - stack->handle.extra = 0; - - /* - * Stack traces of size 0 are never saved, and we can simply use - * the size field as an indicator if this is a new unused stack - * record in the freelist. - */ - stack->size = 0; + if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + /* Bail out if we reached the pool limit. */ + WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ + WARN_ONCE(1, "Stack depot reached limit capacity"); + return false; + } - INIT_LIST_HEAD(&stack->hash_list); - /* - * Add to the freelist front to prioritize never-used entries: - * required in case there are entries in the freelist, but their - * RCU cookie still belongs to the current RCU grace period - * (there can still be concurrent readers). - */ - list_add(&stack->free_list, &free_stacks); - counters[DEPOT_COUNTER_FREELIST_SIZE]++; + if (!new_pool && *prealloc) { + /* We have preallocated memory, use it. */ + WRITE_ONCE(new_pool, *prealloc); + *prealloc = NULL; } + if (!new_pool) + return false; /* new_pool and *prealloc are NULL */ + /* Save reference to the pool to be used by depot_fetch_stack(). */ - stack_pools[pools_num] = pool; + stack_pools[pools_num] = new_pool; + + /* + * Stack depot tries to keep an extra pool allocated even before it runs + * out of space in the currently used pool. + * + * To indicate that a new preallocation is needed new_pool is reset to + * NULL; do not reset to NULL if we have reached the maximum number of + * pools. + */ + if (pools_num < DEPOT_MAX_POOLS) + WRITE_ONCE(new_pool, NULL); + else + WRITE_ONCE(new_pool, STACK_DEPOT_POISON); /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ WRITE_ONCE(pools_num, pools_num + 1); ASSERT_EXCLUSIVE_WRITER(pools_num); + + pool_offset = 0; + + return true; } /* Keeps the preallocated memory to be used for a new stack depot pool. */ @@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc) * If a new pool is already saved or the maximum number of * pools is reached, do not use the preallocated memory. */ - if (!new_pool_required) + if (new_pool) return; - /* - * Use the preallocated memory for the new pool - * as long as we do not exceed the maximum number of pools. - */ - if (pools_num < DEPOT_MAX_POOLS) { - new_pool = *prealloc; - *prealloc = NULL; - } - - /* - * At this point, either a new pool is kept or the maximum - * number of pools is reached. In either case, take note that - * keeping another pool is not required. - */ - WRITE_ONCE(new_pool_required, false); + WRITE_ONCE(new_pool, *prealloc); + *prealloc = NULL; } /* - * Try to initialize a new stack depot pool from either a previous or the - * current pre-allocation, and release all its entries to the freelist. + * Try to initialize a new stack record from the current pool, a cached pool, or + * the current pre-allocation. */ -static bool depot_try_init_pool(void **prealloc) +static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) { + struct stack_record *stack; + void *current_pool; + u32 pool_index; + lockdep_assert_held(&pool_lock); - /* Check if we have a new pool saved and use it. */ - if (new_pool) { - depot_init_pool(new_pool); - new_pool = NULL; + if (pool_offset + size > DEPOT_POOL_SIZE) { + if (!depot_init_pool(prealloc)) + return NULL; + } - /* Take note that we might need a new new_pool. */ - if (pools_num < DEPOT_MAX_POOLS) - WRITE_ONCE(new_pool_required, true); + if (WARN_ON_ONCE(pools_num < 1)) + return NULL; + pool_index = pools_num - 1; + current_pool = stack_pools[pool_index]; + if (WARN_ON_ONCE(!current_pool)) + return NULL; - return true; - } + stack = current_pool + pool_offset; - /* Bail out if we reached the pool limit. */ - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { - WARN_ONCE(1, "Stack depot reached limit capacity"); - return false; - } + /* Pre-initialize handle once. */ + stack->handle.pool_index = pool_index; + stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; + stack->handle.extra = 0; + INIT_LIST_HEAD(&stack->hash_list); - /* Check if we have preallocated memory and use it. */ - if (*prealloc) { - depot_init_pool(*prealloc); - *prealloc = NULL; - return true; - } + pool_offset += size; - return false; + return stack; } -/* Try to find next free usable entry. */ +/* Try to find next free usable entry from the freelist. */ static struct stack_record *depot_pop_free(void) { struct stack_record *stack; @@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void) * check the first entry. */ stack = list_first_entry(&free_stacks, struct stack_record, free_list); - if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + if (!poll_state_synchronize_rcu(stack->rcu_state)) return NULL; list_del(&stack->free_list); @@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void) return stack; } +static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) +{ + const size_t used = flex_array_size(s, entries, nr_entries); + const size_t unused = sizeof(s->entries) - used; + + WARN_ON_ONCE(sizeof(s->entries) < used); + + return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); +} + /* Allocates a new stack in a stack depot pool. */ static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) { - struct stack_record *stack; + struct stack_record *stack = NULL; + size_t record_size; lockdep_assert_held(&pool_lock); /* This should already be checked by public API entry points. */ - if (WARN_ON_ONCE(!size)) + if (WARN_ON_ONCE(!nr_entries)) return NULL; - /* Check if we have a stack record to save the stack trace. */ - stack = depot_pop_free(); - if (!stack) { - /* No usable entries on the freelist - try to refill the freelist. */ - if (!depot_try_init_pool(prealloc)) - return NULL; + /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ + if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) + nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; + + if (flags & STACK_DEPOT_FLAG_GET) { + /* + * Evictable entries have to allocate the max. size so they may + * safely be re-used by differently sized allocations. + */ + record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); stack = depot_pop_free(); - if (WARN_ON(!stack)) - return NULL; + } else { + record_size = depot_stack_record_size(stack, nr_entries); } - /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ - if (size > CONFIG_STACKDEPOT_MAX_FRAMES) - size = CONFIG_STACKDEPOT_MAX_FRAMES; + if (!stack) { + stack = depot_pop_free_pool(prealloc, record_size); + if (!stack) + return NULL; + } /* Save the stack trace. */ stack->hash = hash; - stack->size = size; - /* stack->handle is already filled in by depot_init_pool(). */ - refcount_set(&stack->count, 1); - memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); + stack->size = nr_entries; + /* stack->handle is already filled in by depot_pop_free_pool(). */ + memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); + + if (flags & STACK_DEPOT_FLAG_GET) { + refcount_set(&stack->count, 1); + counters[DEPOT_COUNTER_REFD_ALLOCS]++; + counters[DEPOT_COUNTER_REFD_INUSE]++; + } else { + /* Warn on attempts to switch to refcounting this entry. */ + refcount_set(&stack->count, REFCOUNT_SATURATED); + counters[DEPOT_COUNTER_PERSIST_COUNT]++; + counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; + } /* * Let KMSAN know the stored stack record is initialized. This shall * prevent false positive reports if instrumented code accesses it. */ - kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + kmsan_unpoison_memory(stack, record_size); - counters[DEPOT_COUNTER_ALLOCS]++; - counters[DEPOT_COUNTER_INUSE]++; return stack; } @@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack) list_add_tail(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; - counters[DEPOT_COUNTER_FREES]++; - counters[DEPOT_COUNTER_INUSE]--; + counters[DEPOT_COUNTER_REFD_FREES]++; + counters[DEPOT_COUNTER_REFD_INUSE]--; printk_deferred_exit(); raw_spin_unlock_irqrestore(&pool_lock, flags); @@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { + if (unlikely(can_alloc && !READ_ONCE(new_pool))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = - depot_alloc_stack(entries, nr_entries, hash, &prealloc); + depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); if (new) { /* diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 5662e29fe253..65c19025da3d 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) vaddr &= HPAGE_PUD_MASK; pud = pfn_pud(args->pud_pfn, args->page_prot); + /* + * Some architectures have debug checks to make sure + * huge pud mapping are only found with devmap entries + * For now test with only devmap entries. + */ + pud = pud_mkdevmap(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); flush_dcache_page(page); pudp_set_wrprotect(args->mm, vaddr, args->pudp); @@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) WARN_ON(!pud_none(pud)); #endif /* __PAGETABLE_PMD_FOLDED */ pud = pfn_pud(args->pud_pfn, args->page_prot); + pud = pud_mkdevmap(pud); pud = pud_wrprotect(pud); pud = pud_mkclean(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); @@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) #endif /* __PAGETABLE_PMD_FOLDED */ pud = pfn_pud(args->pud_pfn, args->page_prot); + pud = pud_mkdevmap(pud); pud = pud_mkyoung(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); flush_dcache_page(page); diff --git a/mm/filemap.c b/mm/filemap.c index 750e779c23db..4a30de98a8c7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping, rcu_read_lock(); xas_for_each(&xas, folio, last_index) { + int order; unsigned long nr_pages; pgoff_t folio_first_index, folio_last_index; + /* + * Don't deref the folio. It is not pinned, and might + * get freed (and reused) underneath us. + * + * We *could* pin it, but that would be expensive for + * what should be a fast and lightweight syscall. + * + * Instead, derive all information of interest from + * the rcu-protected xarray. + */ + if (xas_retry(&xas, folio)) continue; + order = xa_get_order(xas.xa, xas.xa_index); + nr_pages = 1 << order; + folio_first_index = round_down(xas.xa_index, 1 << order); + folio_last_index = folio_first_index + nr_pages - 1; + + /* Folios might straddle the range boundaries, only count covered pages */ + if (folio_first_index < first_index) + nr_pages -= first_index - folio_first_index; + + if (folio_last_index > last_index) + nr_pages -= folio_last_index - last_index; + if (xa_is_value(folio)) { /* page is evicted */ void *shadow = (void *)folio; bool workingset; /* not used */ - int order = xa_get_order(xas.xa, xas.xa_index); - - nr_pages = 1 << order; - folio_first_index = round_down(xas.xa_index, 1 << order); - folio_last_index = folio_first_index + nr_pages - 1; - - /* Folios might straddle the range boundaries, only count covered pages */ - if (folio_first_index < first_index) - nr_pages -= first_index - folio_first_index; - - if (folio_last_index > last_index) - nr_pages -= folio_last_index - last_index; cs->nr_evicted += nr_pages; @@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping, goto resched; } - nr_pages = folio_nr_pages(folio); - folio_first_index = folio_pgoff(folio); - folio_last_index = folio_first_index + nr_pages - 1; - - /* Folios might straddle the range boundaries, only count covered pages */ - if (folio_first_index < first_index) - nr_pages -= first_index - folio_first_index; - - if (folio_last_index > last_index) - nr_pages -= folio_last_index - last_index; - /* page is in cache */ cs->nr_cache += nr_pages; - if (folio_test_dirty(folio)) + if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY)) cs->nr_dirty += nr_pages; - if (folio_test_writeback(folio)) + if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK)) cs->nr_writeback += nr_pages; resched: diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 610efae91220..6ca63e8dda74 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags) { depot_stack_handle_t stack; - stack = kasan_save_stack(flags, - STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET); + stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC); kasan_set_track(track, stack); } @@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, return true; /* - * If the object is not put into quarantine, it will likely be quickly - * reallocated. Thus, release its metadata now. + * Note: Keep per-object metadata to allow KASAN print stack traces for + * use-after-free-before-realloc bugs. */ - kasan_release_object_meta(cache, object); /* Let slab put the object onto the freelist. */ return false; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 032bf3e98c24..1900f8576034 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -485,16 +485,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) if (alloc_meta) { /* Zero out alloc meta to mark it as invalid. */ __memset(alloc_meta, 0, sizeof(*alloc_meta)); - - /* - * Prepare the lock for saving auxiliary stack traces. - * Temporarily disable KASAN bug reporting to allow instrumented - * raw_spin_lock_init to access aux_lock, which resides inside - * of a redzone. - */ - kasan_disable_current(); - raw_spin_lock_init(&alloc_meta->aux_lock); - kasan_enable_current(); } /* @@ -506,18 +496,8 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) static void release_alloc_meta(struct kasan_alloc_meta *meta) { - /* Evict the stack traces from stack depot. */ - stack_depot_put(meta->alloc_track.stack); - stack_depot_put(meta->aux_stack[0]); - stack_depot_put(meta->aux_stack[1]); - - /* - * Zero out alloc meta to mark it as invalid but keep aux_lock - * initialized to avoid having to reinitialize it when another object - * is allocated in the same slot. - */ - __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track)); - __memset(meta->aux_stack, 0, sizeof(meta->aux_stack)); + /* Zero out alloc meta to mark it as invalid. */ + __memset(meta, 0, sizeof(*meta)); } static void release_free_meta(const void *object, struct kasan_free_meta *meta) @@ -529,27 +509,10 @@ static void release_free_meta(const void *object, struct kasan_free_meta *meta) if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META) return; - /* Evict the stack trace from the stack depot. */ - stack_depot_put(meta->free_track.stack); - /* Mark free meta as invalid. */ *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE; } -void kasan_release_object_meta(struct kmem_cache *cache, const void *object) -{ - struct kasan_alloc_meta *alloc_meta; - struct kasan_free_meta *free_meta; - - alloc_meta = kasan_get_alloc_meta(cache, object); - if (alloc_meta) - release_alloc_meta(alloc_meta); - - free_meta = kasan_get_free_meta(cache, object); - if (free_meta) - release_free_meta(object, free_meta); -} - size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object) { struct kasan_cache *info = &cache->kasan_info; @@ -574,8 +537,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) struct kmem_cache *cache; struct kasan_alloc_meta *alloc_meta; void *object; - depot_stack_handle_t new_handle, old_handle; - unsigned long flags; if (is_kfence_address(addr) || !slab) return; @@ -586,33 +547,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) if (!alloc_meta) return; - new_handle = kasan_save_stack(0, depot_flags); - - /* - * Temporarily disable KASAN bug reporting to allow instrumented - * spinlock functions to access aux_lock, which resides inside of a - * redzone. - */ - kasan_disable_current(); - raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags); - old_handle = alloc_meta->aux_stack[1]; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = new_handle; - raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags); - kasan_enable_current(); - - stack_depot_put(old_handle); + alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags); } void kasan_record_aux_stack(void *addr) { - return __kasan_record_aux_stack(addr, - STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET); + return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC); } void kasan_record_aux_stack_noalloc(void *addr) { - return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET); + return __kasan_record_aux_stack(addr, 0); } void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) @@ -623,7 +569,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) if (!alloc_meta) return; - /* Evict previous stack traces (might exist for krealloc or mempool). */ + /* Invalidate previous stack traces (might exist for krealloc or mempool). */ release_alloc_meta(alloc_meta); kasan_save_track(&alloc_meta->alloc_track, flags); @@ -637,7 +583,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object) if (!free_meta) return; - /* Evict previous stack trace (might exist for mempool). */ + /* Invalidate previous stack trace (might exist for mempool). */ release_free_meta(object, free_meta); kasan_save_track(&free_meta->free_track, 0); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d0f172f2b978..fb2b9ac0659a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -6,7 +6,6 @@ #include <linux/kasan.h> #include <linux/kasan-tags.h> #include <linux/kfence.h> -#include <linux/spinlock.h> #include <linux/stackdepot.h> #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) @@ -265,13 +264,6 @@ struct kasan_global { struct kasan_alloc_meta { struct kasan_track alloc_track; /* Free track is stored in kasan_free_meta. */ - /* - * aux_lock protects aux_stack from accesses from concurrent - * kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping - * on RT kernels, as kasan_record_aux_stack_noalloc can be called from - * non-sleepable contexts. - */ - raw_spinlock_t aux_lock; depot_stack_handle_t aux_stack[2]; }; @@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, const void *object); void kasan_init_object_meta(struct kmem_cache *cache, const void *object); -void kasan_release_object_meta(struct kmem_cache *cache, const void *object); #else static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { } -static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { } #endif depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags); diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 3ba02efb952a..6958aa713c67 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) void *object = qlink_to_object(qlink, cache); struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object); - kasan_release_object_meta(cache, object); + /* + * Note: Keep per-object metadata to allow KASAN print stack traces for + * use-after-free-before-realloc bugs. + */ /* * If init_on_free is enabled and KASAN's free metadata is stored in diff --git a/mm/migrate.c b/mm/migrate.c index cc9f2bcd73b4..c27b1f8097d4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2519,6 +2519,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) if (managed_zone(pgdat->node_zones + z)) break; } + + /* + * If there are no managed zones, it should not proceed + * further. + */ + if (z < 0) + return 0; + wakeup_kswapd(pgdat->node_zones + z, 0, folio_order(folio), ZONE_MOVABLE); return 0; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 65601aa52e0d..2821a42cefdc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ef8c3bed7361..2a5f5a7d2412 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5329,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) @@ -6794,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); @@ -7420,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a6fc8a2a5c67..5716345a26df 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2206,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) @@ -5559,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 60298975d5c4..656f49b299d2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bb72ff6eb22f..ee3b4aad8bd8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static int set_discoverable_sync(struct hci_dev *hdev, void *data); + static void discov_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work) hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; - hci_update_discoverable(hdev); + hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL); mgmt_new_settings(hdev); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 053ef8f25fae..1d34d8497033 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) { + if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) { s = rfcomm_recv_frame(s, skb); if (!s) break; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index ed1720890757..35e10c5a766d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include <linux/sysctl.h> #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack_core.h> +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index abb090f94ed2..6f877e31709b 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, + { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, diff --git a/net/core/dev.c b/net/core/dev.c index 73a021973007..0230391c78f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9078,7 +9078,7 @@ static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll { #if IS_ENABLED(CONFIG_DPLL) rtnl_lock(); - dev->dpll_pin = dpll_pin; + rcu_assign_pointer(dev->dpll_pin, dpll_pin); rtnl_unlock(); #endif } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c4f427f3a50..ae86f751efc3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5169,10 +5169,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5190,11 +5189,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5238,8 +5237,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b34c..5d68cb181695 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); @@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) continue; /* Don't send frame over port where it has been sent before. - * Also fro SAN, this shouldn't be done. + * Also for SAN, this shouldn't be done. */ if (!frame->is_from_san && hsr_register_frame_out(port, frame->node_src, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a4513ffb66cb..1b6981de3f29 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -554,6 +554,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -632,13 +646,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -818,16 +832,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5a839c5fb1a5..055230b669cf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5509,9 +5509,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d5ea5f5bcf3a..9d33fd2377c8 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local, rcu_read_unlock(); } - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + if (sta->uploaded) + drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) diff --git a/net/mctp/route.c b/net/mctp/route.c index 6218dcd07e18..ceee44ea09d9 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 6ff6f14674aa..7017dd60659d 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e3e96a49f922..63fc0758c22d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -981,10 +981,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); -set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); +set_fully_established: mptcp_data_lock((struct sock *)msk); __mptcp_subflow_fully_established(msk, subflow, mp_opt); mptcp_data_unlock((struct sock *)msk); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index d396a5973429..bc97cc30f013 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -495,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 948606a537da..7833a49f6214 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = mptcp_get_ext(skb); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } @@ -3177,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3199,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; @@ -3210,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; mptcp_init_sched(msk, mptcp_sk(sk)->sched); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 631a7f445f34..07f6242afc1a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -790,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline void mptcp_write_space(struct sock *sk) +{ + if (sk_stream_is_writeable(sk)) { + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) + sk_stream_write_space(sk); + } +} + static inline void __mptcp_sync_sndbuf(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -808,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); + mptcp_write_space(sk); } /* The called held both the msk socket and the subflow socket locks, @@ -838,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) local_bh_enable(); } -static inline void mptcp_write_space(struct sock *sk) -{ - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); #define MPTCP_TOKEN_MAX_RETRIES 4 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e5f3864d353..5b876fa7f9af 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = { .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 1f9474fefe84..d3d11dede545 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9c962347cf85..ff315351269f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index de96959336c4..211f57164cb6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -52,6 +52,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -274,22 +275,30 @@ static int tls_do_decryption(struct sock *sk, DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { + DECLARE_CRYPTO_WAIT(wait); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &ctx->async_wait); + crypto_req_done, &wait); + ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS || ret == -EBUSY) + ret = crypto_wait_req(ret, &wait); + return ret; } ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS) + return 0; + if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); - ret = ret ?: -EINPROGRESS; + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; } - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); - } + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -1588,8 +1597,11 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1601,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; @@ -1948,6 +1963,7 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -2095,8 +2111,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2135,8 +2153,8 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek, NULL); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b09700400d09..bd54a928bab4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4197,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 98e1150bee9d..9a3dcaafb5b1 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -784,7 +784,7 @@ static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx, int error = -ENOENT; struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label = NULL; - char *value; + char *value = NULL; switch (attr) { case LSM_ATTR_CURRENT: diff --git a/security/landlock/fs.c b/security/landlock/fs.c index fc520a06f9af..0171f7eb6ee1 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -737,8 +737,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a6bf90ace84c..338b023a8c3e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6559,7 +6559,7 @@ static int selinux_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, size_t *size, u32 flags) { int rc; - char *val; + char *val = NULL; int val_len; val_len = selinux_lsm_getattr(attr, current, &val); diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 57ee70ae50f2..ea3140d510ec 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2649,13 +2649,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 6e6d474c8366..45e49671ae87 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -523,6 +523,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0; } } + i++; } return 0; diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 0a58ebb8b04c..f300f4e1eb59 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -20,7 +20,7 @@ flush_pids() ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null - for _ in $(seq 10); do + for _ in $(seq $((timeout_poll * 10))); do [ -z "$(ip netns pids "${ns}")" ] && break sleep 0.1 done @@ -91,6 +91,15 @@ chk_msk_nr() __chk_msk_nr "grep -c token:" "$@" } +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -inmlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -inmlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + wait_msk_nr() { local condition="grep -c token:" @@ -289,5 +298,24 @@ flush_pids chk_msk_inuse 0 "many->0" chk_msk_cestab 0 "many->0" +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & +done + +for I in $(seq 1 $NR_SERVERS); do + mptcp_lib_wait_local_port_listen $ns $((I + 20001)) +done + +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done +flush_pids + mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c07386e21e0a..e4581b0dfb96 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -161,6 +161,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" @@ -3333,16 +3338,17 @@ userspace_pm_rm_sf() { local evts=$evts_ns1 local t=${3:-1} - local ip=4 + local ip local tk da dp sp local cnt [ "$1" == "$ns2" ] && evts=$evts_ns2 - if mptcp_lib_is_v6 $2; then ip=6; fi + [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4 + [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6 tk=$(mptcp_lib_evts_get_info token "$evts") - da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) - dp=$(mptcp_lib_evts_get_info dport "$evts" $t) - sp=$(mptcp_lib_evts_get_info sport "$evts" $t) + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2) cnt=$(rm_sf_count ${1}) ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ @@ -3429,20 +3435,23 @@ userspace_tests() if reset_with_events "userspace pm add & remove address" && continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns2 2 2 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 - chk_join_nr 1 1 1 - chk_add_nr 1 1 - chk_mptcp_info subflows 1 subflows 1 - chk_subflows_total 2 2 - chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_add_addr $ns1 10.0.3.1 20 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_mptcp_info subflows 2 subflows 2 + chk_subflows_total 3 3 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED - chk_rm_nr 1 1 invert + userspace_pm_rm_addr $ns1 20 + userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED + chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 3a2abae5993e..3777d66fc56d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -213,9 +213,9 @@ mptcp_lib_get_info_value() { grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' } -# $1: info name ; $2: evts_ns ; $3: event type +# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" + grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } # $1: PID diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 27574bbf2d63..5ae85def0739 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -247,6 +247,20 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +chk_gro_flag "gro vs xdp while down - gro flag on" $DST on +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST off +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST off +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST off +cleanup + +create_ns chk_channels "default channels" $DST 1 1 ip -n $NS_DST link set dev veth$DST down diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index db27153eb4a0..936c3085bb83 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh xt_string.sh + conntrack_sctp_collision.sh xt_string.sh \ + bridge_netfilter.sh HOSTPKG_CONFIG := pkg-config diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh new file mode 100644 index 000000000000..659b3ab02c8b --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge netfilter + conntrack, a combination that doesn't really work, +# with multicast/broadcast packets racing for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" +ns2="ns2-$sfx" +ns3="ns3-$sfx" +ns4="ns4-$sfx" + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +for i in $(seq 0 4); do + eval ip netns add \$ns$i +done + +cleanup() { + for i in $(seq 0 4); do eval ip netns del \$ns$i;done +} + +trap cleanup EXIT + +do_ping() +{ + fromns="$1" + dstip="$2" + + ip netns exec $fromns ping -c 1 -q $dstip > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + for i in $(seq 1 1000); do + ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + fi + done +} + +ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1} +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2 +ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3 +ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4 + +ip -net ${ns0} link set lo up + +for i in $(seq 1 4); do + ip -net ${ns0} link set veth$i up +done + +ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net ${ns0} link set veth$i master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private +ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan0 up +ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa +ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net ${ns0} link set macvlan4 master br0 + +ip -net ${ns0} link set br0 up +ip -net ${ns0} addr add 10.0.0.1/24 dev br0 +ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1 +ret=$? +if [ $ret -ne 0 ] ; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec ${ns0} nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns$i link set lo up + eval ip -net \$ns$i link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +ip -net ${ns3} addr add 10.23.0.13/24 dev eth0 +ip -net ${ns4} addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping ${ns1} 10.0.0.12 +do_ping ${ns3} 10.23.0.1 +do_ping ${ns4} 10.23.0.1 + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0" +fi + +bcast_ping ${ns1} 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping ${ns3} 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping ${ns4} 10.23.0.255 + +read t < /proc/sys/kernel/tainted + +if [ $t -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret |