aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-class-bluetooth9
-rw-r--r--Documentation/block/ublk.rst2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml8
-rw-r--r--Documentation/filesystems/locking.rst7
-rw-r--r--Documentation/filesystems/porting.rst16
-rw-r--r--Documentation/filesystems/vfs.rst24
-rw-r--r--Documentation/networking/can.rst4
-rw-r--r--Documentation/networking/mptcp-sysctl.rst2
-rw-r--r--Documentation/networking/napi.rst2
-rw-r--r--Documentation/power/video.rst2
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/s390/Kconfig9
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/als.c10
-rw-r--r--arch/s390/boot/boot.h26
-rw-r--r--arch/s390/boot/decompressor.c12
-rw-r--r--arch/s390/boot/ipl_parm.c20
-rw-r--r--arch/s390/boot/ipl_report.c3
-rw-r--r--arch/s390/boot/kaslr.c4
-rw-r--r--arch/s390/boot/pgm_check_info.c53
-rw-r--r--arch/s390/boot/physmem_info.c103
-rw-r--r--arch/s390/boot/printk.c224
-rw-r--r--arch/s390/boot/startup.c47
-rw-r--r--arch/s390/boot/vmem.c135
-rw-r--r--arch/s390/include/asm/asm-extable.h14
-rw-r--r--arch/s390/include/asm/asm.h2
-rw-r--r--arch/s390/include/asm/bitops.h2
-rw-r--r--arch/s390/include/asm/boot_data.h51
-rw-r--r--arch/s390/include/asm/fpu-insn.h183
-rw-r--r--arch/s390/include/asm/ftrace.h1
-rw-r--r--arch/s390/include/asm/futex.h107
-rw-r--r--arch/s390/include/asm/page.h4
-rw-r--r--arch/s390/include/asm/physmem_info.h4
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/uaccess.h548
-rw-r--r--arch/s390/kernel/early.c3
-rw-r--r--arch/s390/kernel/ftrace.c5
-rw-r--r--arch/s390/kernel/setup.c37
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/lib/uaccess.c90
-rw-r--r--arch/s390/mm/extable.c30
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/pci/pci_bus.c1
-rw-r--r--arch/s390/purgatory/Makefile2
-rw-r--r--arch/s390/tools/gen_opcode_table.c27
-rw-r--r--arch/x86/kernel/acpi/boot.c50
-rw-r--r--drivers/acpi/x86/utils.c13
-rw-r--r--drivers/base/power/main.c29
-rw-r--r--drivers/bluetooth/btnxpuart.c3
-rw-r--r--drivers/bluetooth/btusb.c12
-rw-r--r--drivers/cpufreq/Kconfig.arm2
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c5
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c5
-rw-r--r--drivers/cpufreq/cpufreq.c20
-rw-r--r--drivers/cpufreq/s3c64xx-cpufreq.c11
-rw-r--r--drivers/cpuidle/governors/teo.c197
-rw-r--r--drivers/gpio/gpio-mxc.c3
-rw-r--r--drivers/net/bonding/bond_main.c19
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.h3
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c35
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c3
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.c15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_parser.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_parser_rt.c12
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_main.c15
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c25
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c1
-rw-r--r--drivers/net/ethernet/mediatek/airoha_eth.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c32
-rw-r--r--drivers/net/ethernet/realtek/8139too.c4
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c22
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c57
-rw-r--r--drivers/net/ethernet/sun/niu.c10
-rw-r--r--drivers/net/ethernet/via/via-rhine.c11
-rw-r--r--drivers/net/netdevsim/ethtool.c2
-rw-r--r--drivers/net/netdevsim/netdevsim.h1
-rw-r--r--drivers/net/netdevsim/udp_tunnels.c23
-rw-r--r--drivers/net/phy/marvell-88q2xxx.c33
-rw-r--r--drivers/net/phy/nxp-c45-tja11xx.c2
-rw-r--r--drivers/net/usb/ipheth.c69
-rw-r--r--drivers/net/usb/rtl8150.c22
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mac.c9
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/pci.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/pci.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mac.c17
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/pci.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/pci.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mac.c12
-rw-r--r--drivers/ptp/ptp_chardev.c4
-rw-r--r--drivers/ptp/ptp_clock.c8
-rw-r--r--drivers/s390/char/sclp.c12
-rw-r--r--drivers/s390/char/vmlogrdr.c8
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_dentry.c26
-rw-r--r--fs/afs/dir.c40
-rw-r--r--fs/bcachefs/btree_cache.c5
-rw-r--r--fs/bcachefs/btree_iter.c3
-rw-r--r--fs/bcachefs/btree_key_cache.c4
-rw-r--r--fs/bcachefs/btree_trans_commit.c2
-rw-r--r--fs/bcachefs/compress.c31
-rw-r--r--fs/bcachefs/compress.h4
-rw-r--r--fs/bcachefs/data_update.c50
-rw-r--r--fs/bcachefs/debug.c1
-rw-r--r--fs/bcachefs/io_write.c4
-rw-r--r--fs/bcachefs/io_write.h2
-rw-r--r--fs/bcachefs/journal.c92
-rw-r--r--fs/bcachefs/journal.h9
-rw-r--r--fs/bcachefs/journal_io.c2
-rw-r--r--fs/bcachefs/journal_reclaim.c142
-rw-r--r--fs/bcachefs/journal_reclaim.h3
-rw-r--r--fs/bcachefs/journal_types.h13
-rw-r--r--fs/bcachefs/movinggc.c11
-rw-r--r--fs/bcachefs/opts.h4
-rw-r--r--fs/bcachefs/sb-errors_format.h2
-rw-r--r--fs/bcachefs/str_hash.c24
-rw-r--r--fs/bcachefs/trace.h26
-rw-r--r--fs/ceph/dir.c25
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/crypto/fname.c22
-rw-r--r--fs/dcache.c103
-rw-r--r--fs/ecryptfs/dentry.c18
-rw-r--r--fs/exfat/namei.c11
-rw-r--r--fs/ext4/fast_commit.c29
-rw-r--r--fs/ext4/fast_commit.h3
-rw-r--r--fs/fat/namei_vfat.c19
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/gfs2/dentry.c31
-rw-r--r--fs/hfs/sysdep.c3
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/kernfs/dir.c3
-rw-r--r--fs/libfs.c15
-rw-r--r--fs/namei.c18
-rw-r--r--fs/nfs/dir.c62
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs3proc.c5
-rw-r--r--fs/nfs/nfs4proc.c20
-rw-r--r--fs/nfs/proc.c6
-rw-r--r--fs/ntfs3/attrib.c15
-rw-r--r--fs/ntfs3/dir.c2
-rw-r--r--fs/ntfs3/frecord.c74
-rw-r--r--fs/ntfs3/fsntfs.c6
-rw-r--r--fs/ntfs3/index.c6
-rw-r--r--fs/ntfs3/inode.c3
-rw-r--r--fs/ntfs3/ntfs_fs.h21
-rw-r--r--fs/ntfs3/record.c79
-rw-r--r--fs/ocfs2/dcache.c14
-rw-r--r--fs/orangefs/dcache.c22
-rw-r--r--fs/overlayfs/super.c22
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/fd.c3
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/smb/client/dir.c3
-rw-r--r--fs/tracefs/inode.c3
-rw-r--r--fs/vboxsf/dir.c3
-rw-r--r--include/linux/dcache.h23
-rw-r--r--include/linux/fscrypt.h7
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/pm.h5
-rw-r--r--include/net/page_pool/types.h1
-rw-r--r--include/net/xfrm.h16
-rw-r--r--kernel/power/hibernate.c7
-rw-r--r--kernel/sched/cpufreq_schedutil.c6
-rw-r--r--kernel/sched/syscalls.c7
-rw-r--r--net/bluetooth/l2cap_sock.c4
-rw-r--r--net/core/dev.c39
-rw-r--r--net/core/page_pool.c2
-rw-r--r--net/core/page_pool_priv.h2
-rw-r--r--net/core/page_pool_user.c15
-rw-r--r--net/ethtool/ioctl.c2
-rw-r--r--net/hsr/hsr_forward.c7
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/ipmr_base.c3
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/mptcp/ctrl.c4
-rw-r--r--net/mptcp/options.c13
-rw-r--r--net/mptcp/pm_netlink.c3
-rw-r--r--net/mptcp/protocol.c4
-rw-r--r--net/mptcp/protocol.h30
-rw-r--r--net/ncsi/ncsi-manage.c13
-rw-r--r--net/ncsi/ncsi-rsp.c18
-rw-r--r--net/netfilter/nf_tables_api.c8
-rw-r--r--net/nfc/nci/hci.c2
-rw-r--r--net/rose/rose_timer.c15
-rw-r--r--net/rxrpc/peer_event.c16
-rw-r--r--net/rxrpc/peer_object.c12
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/vmw_vsock/af_vsock.c13
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_output.c7
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_replay.c10
-rw-r--r--net/xfrm/xfrm_state.c93
-rw-r--r--tools/net/ynl/lib/ynl.c2
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma.c2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh16
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh31
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py14
-rw-r--r--tools/testing/selftests/net/lib/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile2
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/vsock/util.c88
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c122
228 files changed, 2980 insertions, 1801 deletions
diff --git a/Documentation/ABI/stable/sysfs-class-bluetooth b/Documentation/ABI/stable/sysfs-class-bluetooth
new file mode 100644
index 000000000000..36be02471174
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-bluetooth
@@ -0,0 +1,9 @@
+What: /sys/class/bluetooth/hci<index>/reset
+Date: 14-Jan-2025
+KernelVersion: 6.13
+Contact: linux-bluetooth@vger.kernel.org
+Description: This write-only attribute allows users to trigger the vendor reset
+ method on the Bluetooth device when arbitrary data is written.
+ The reset may or may not be done through the device transport
+ (e.g., UART/USB), and can also be done through an out-of-band
+ approach such as GPIO.
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
index 51665a3e6a50..1e0e7358e14a 100644
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@@ -333,6 +333,4 @@ References
.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
-.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
-
.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
index f117471fb06f..e7ee0d9efed8 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -22,12 +22,12 @@ properties:
oneOf:
- items:
- enum:
- - qcom,qcs8300-ethqos
- - const: qcom,sa8775p-ethqos
+ - qcom,qcs615-ethqos
+ - const: qcom,qcs404-ethqos
- items:
- enum:
- - qcom,qcs615-ethqos
- - const: qcom,sm8150-ethqos
+ - qcom,qcs8300-ethqos
+ - const: qcom,sa8775p-ethqos
- enum:
- qcom,qcs404-ethqos
- qcom,sa8775p-ethqos
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index f5e3676db954..d20a32b77b60 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -17,7 +17,8 @@ dentry_operations
prototypes::
- int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_revalidate)(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
int (*d_hash)(const struct dentry *, struct qstr *);
int (*d_compare)(const struct dentry *,
@@ -30,6 +31,8 @@ prototypes::
struct vfsmount *(*d_automount)(struct path *path);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
+ bool (*d_unalias_trylock)(const struct dentry *);
+ void (*d_unalias_unlock)(const struct dentry *);
locking rules:
@@ -49,6 +52,8 @@ d_dname: no no no no
d_automount: no no yes no
d_manage: no no yes (ref-walk) maybe
d_real no no yes no
+d_unalias_trylock yes no no no
+d_unalias_unlock yes no no no
================== =========== ======== ============== ========
inode_operations
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index c1c121055204..1639e78e3146 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1141,3 +1141,19 @@ pointer are gone.
set_blocksize() takes opened struct file instead of struct block_device now
and it *must* be opened exclusive.
+
+---
+
+** mandatory**
+
+->d_revalidate() gets two extra arguments - inode of parent directory and
+name our dentry is expected to have. Both are stable (dir is pinned in
+non-RCU case and will stay around during the call in RCU case, and name
+is guaranteed to stay unchanging). Your instance doesn't have to use
+either, but it often helps to avoid a lot of painful boilerplate.
+Note that while name->name is stable and NUL-terminated, it may (and
+often will) have name->name[name->len] equal to '/' rather than '\0' -
+in normal case it points into the pathname being looked up.
+NOTE: if you need something like full path from the root of filesystem,
+you are still on your own - this assists with simple cases, but it's not
+magic.
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 0b18af3f954e..31eea688609a 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1251,7 +1251,8 @@ defined:
.. code-block:: c
struct dentry_operations {
- int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_revalidate)(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
int (*d_hash)(const struct dentry *, struct qstr *);
int (*d_compare)(const struct dentry *,
@@ -1264,6 +1265,8 @@ defined:
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
+ bool (*d_unalias_trylock)(const struct dentry *);
+ void (*d_unalias_unlock)(const struct dentry *);
};
``d_revalidate``
@@ -1427,6 +1430,25 @@ defined:
For non-regular files, the 'dentry' argument is returned.
+``d_unalias_trylock``
+ if present, will be called by d_splice_alias() before moving a
+ preexisting attached alias. Returning false prevents __d_move(),
+ making d_splice_alias() fail with -ESTALE.
+
+ Rationale: setting FS_RENAME_DOES_D_MOVE will prevent d_move()
+ and d_exchange() calls from the outside of filesystem methods;
+ however, it does not guarantee that attached dentries won't
+ be renamed or moved by d_splice_alias() finding a preexisting
+ alias for a directory inode. Normally we would not care;
+ however, something that wants to stabilize the entire path to
+ root over a blocking operation might need that. See 9p for one
+ (and hopefully only) example.
+
+``d_unalias_unlock``
+ should be paired with ``d_unalias_trylock``; that one is called after
+ __d_move() call in __d_unalias().
+
+
Each dentry has a pointer to its parent dentry, as well as a hash list
of child dentries. Child dentries are basically like files in a
directory.
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index 62519d38c58b..b018ce346392 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -699,10 +699,10 @@ RAW socket option CAN_RAW_JOIN_FILTERS
The CAN_RAW socket can set multiple CAN identifier specific filters that
lead to multiple filters in the af_can.c filter processing. These filters
-are indenpendent from each other which leads to logical OR'ed filters when
+are independent from each other which leads to logical OR'ed filters when
applied (see :ref:`socketcan-rawfilter`).
-This socket option joines the given CAN filters in the way that only CAN
+This socket option joins the given CAN filters in the way that only CAN
frames are passed to user space that matched *all* given CAN filters. The
semantic for the applied filters is therefore changed to a logical AND.
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index dc45c0211353..03e1d3610333 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -41,7 +41,7 @@ blackhole_timeout - INTEGER (seconds)
MPTCP is re-enabled and will reset to the initial value when the
blackhole issue goes away.
- 0 to disable the blackhole detection.
+ 0 to disable the blackhole detection. This is a per-namespace sysctl.
Default: 3600
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
index 6083210ab2a4..f970a2be271a 100644
--- a/Documentation/networking/napi.rst
+++ b/Documentation/networking/napi.rst
@@ -362,7 +362,7 @@ It is expected that ``irq-suspend-timeout`` will be set to a value much larger
than ``gro_flush_timeout`` as ``irq-suspend-timeout`` should suspend IRQs for
the duration of one userland processing cycle.
-While it is not stricly necessary to use ``napi_defer_hard_irqs`` and
+While it is not strictly necessary to use ``napi_defer_hard_irqs`` and
``gro_flush_timeout`` to use IRQ suspension, their use is strongly
recommended.
diff --git a/Documentation/power/video.rst b/Documentation/power/video.rst
index 337a2ba9f32f..8ab2458d1304 100644
--- a/Documentation/power/video.rst
+++ b/Documentation/power/video.rst
@@ -190,7 +190,7 @@ Toshiba Portege 3020CT s3_mode (3)
Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK)
Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK)
Toshiba Satellite 4090XCDT ??? [#f1]_
-Toshiba Satellite P10-554 s3_bios,s3_mode (4)[#f3]_
+Toshiba Satellite P10-554 s3_bios,s3_mode (4) [#f3]_
Toshiba M30 (2) xor X with nvidia driver using internal AGP
Uniwill 244IIO ??? [#f1]_
=============================== ===============================================
diff --git a/MAINTAINERS b/MAINTAINERS
index bc8ce7af3303..d1086e53a317 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4102,6 +4102,7 @@ S: Supported
W: http://www.bluez.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
+F: Documentation/ABI/stable/sysfs-class-bluetooth
F: include/net/bluetooth/
F: net/bluetooth/
@@ -16277,6 +16278,7 @@ F: drivers/scsi/sun3_scsi_vme.c
NCSI LIBRARY
M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
+R: Paul Fertser <fercerpav@gmail.com>
S: Maintained
F: net/ncsi/
@@ -16611,6 +16613,7 @@ F: tools/testing/selftests/net/mptcp/
NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com>
+M: Neal Cardwell <ncardwell@google.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/net_cachelines/tcp_sock.rst
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6e9545d8b0c7..9c9ec08d78c7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,13 +52,19 @@ config KASAN_SHADOW_OFFSET
depends on KASAN
default 0x1C000000000000
-config GCC_ASM_FLAG_OUTPUT_BROKEN
+config CC_ASM_FLAG_OUTPUT_BROKEN
def_bool CC_IS_GCC && GCC_VERSION < 140200
help
GCC versions before 14.2.0 may die with an internal
compiler error in some configurations if flag output
operands are used within inline assemblies.
+config CC_HAS_ASM_AOR_FORMAT_FLAGS
+ def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100)
+ help
+ Clang versions before 19.1.0 do not support A,
+ O, and R inline assembly format flags.
+
config S390
def_bool y
#
@@ -72,6 +78,7 @@ config S390
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 3f25498dac65..5fae311203c2 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
endif
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 11e0c3d5dbc8..79afb5fa7f1f 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -46,7 +46,7 @@ void print_missing_facilities(void)
* z/VM adds a four character prefix.
*/
if (strlen(als_str) > 70) {
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
*als_str = '\0';
}
u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -54,7 +54,7 @@ void print_missing_facilities(void)
first = 0;
}
}
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
}
static void facility_mismatch(void)
@@ -62,10 +62,10 @@ static void facility_mismatch(void)
struct cpuid id;
get_cpu_id(&id);
- boot_printk("The Linux kernel requires more recent processor hardware\n");
- boot_printk("Detected machine-type number: %4x\n", id.machine);
+ boot_emerg("The Linux kernel requires more recent processor hardware\n");
+ boot_emerg("Detected machine-type number: %4x\n", id.machine);
print_missing_facilities();
- boot_printk("See Principles of Operations for facility bits\n");
+ boot_emerg("See Principles of Operations for facility bits\n");
disabled_wait();
}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 56244fe78182..69f261566a64 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,6 +8,7 @@
#ifndef __ASSEMBLY__
+#include <linux/printk.h>
#include <asm/physmem_info.h>
struct machine_info {
@@ -47,13 +48,16 @@ void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align);
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void);
@@ -69,12 +73,28 @@ void print_pgm_check_info(void);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
-void __printf(1, 2) boot_printk(const char *fmt, ...);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt) fmt
+#endif
+
+#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index f478e8e9cbda..03500b9d9fb9 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/boot_data.h>
#include <asm/page.h>
#include "decompressor.h"
#include "boot.h"
@@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
+static void decompress_error(char *m)
+{
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Decompression error: %s\n", m);
+ boot_emerg(" -- System halted\n");
+ disabled_wait();
+}
+
unsigned long mem_safe_offset(void)
{
return ALIGN(free_mem_end_ptr, PAGE_SIZE);
@@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void)
void deploy_kernel(void *output)
{
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, vmlinux.image_size, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
}
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 557462e62cd7..d3731f2983b7 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -215,7 +215,7 @@ static void check_cleared_facilities(void)
for (i = 0; i < ARRAY_SIZE(als); i++) {
if ((stfle_fac_list[i] & als[i]) != als[i]) {
- boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+ boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
print_missing_facilities();
break;
}
@@ -313,5 +313,23 @@ void parse_boot_command_line(void)
#endif
if (!strcmp(param, "relocate_lowcore") && test_facility(193))
relocate_lowcore = 1;
+ if (!strcmp(param, "earlyprintk"))
+ boot_earlyprintk = true;
+ if (!strcmp(param, "debug"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ if (!strcmp(param, "bootdebug")) {
+ bootdebug = true;
+ if (val)
+ strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1);
+ }
+ if (!strcmp(param, "quiet"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ if (!strcmp(param, "ignore_loglevel"))
+ boot_ignore_loglevel = true;
+ if (!strcmp(param, "loglevel")) {
+ boot_console_loglevel = simple_strtoull(val, NULL, 10);
+ if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+ boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+ }
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index d00898852a88..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
- size_t size;
/*
* Find the length for the IPL report boot data
@@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void)
return;
size = get_cert_comp_list_size();
- early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
copy_components_bootdata();
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index f864d2bff775..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -32,7 +32,7 @@ struct prng_parm {
static int check_prng(void)
{
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
- boot_printk("KASLR disabled: CPU has no PRNG\n");
+ boot_warn("KASLR disabled: CPU has no PRNG\n");
return 0;
}
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a
* cannot have chains.
*
* On the other hand, "dynamic" or "repetitive" allocations are done via
- * physmem_alloc_top_down(). These allocations are tightly packed together
+ * physmem_alloc_or_die(). These allocations are tightly packed together
* top down from the end of online memory. physmem_alloc_pos represents
* current position where those allocations start.
*
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 5abe59fb3bc0..633f11600aab 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -17,13 +17,14 @@ void print_stacktrace(unsigned long sp)
(unsigned long)_stack_end };
bool first = true;
- boot_printk("Call Trace:\n");
+ boot_emerg("Call Trace:\n");
while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
struct stack_frame *sf = (struct stack_frame *)sp;
- boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
- " sp:%016lx [<%016lx>] %pS\n",
- sp, sf->gprs[8], (void *)sf->gprs[8]);
+ if (first)
+ boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ else
+ boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
if (sf->back_chain <= sp)
break;
sp = sf->back_chain;
@@ -36,30 +37,30 @@ void print_pgm_check_info(void)
unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area;
struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area);
- boot_printk("Linux version %s\n", kernel_version);
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Kernel fault: interruption code %04x ilc:%x\n",
- get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+ get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
if (kaslr_enabled()) {
- boot_printk("Kernel random base: %lx\n", __kaslr_offset);
- boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+ boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
}
- boot_printk("PSW : %016lx %016lx (%pS)\n",
- get_lowcore()->psw_save_area.mask,
- get_lowcore()->psw_save_area.addr,
- (void *)get_lowcore()->psw_save_area.addr);
- boot_printk(
- " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
- psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
- psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
- psw->eaba);
- boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+ boot_emerg("PSW : %016lx %016lx (%pS)\n",
+ get_lowcore()->psw_save_area.mask,
+ get_lowcore()->psw_save_area.addr,
+ (void *)get_lowcore()->psw_save_area.addr);
+ boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+ psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+ psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+ boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
print_stacktrace(get_lowcore()->gpregs_save_area[15]);
- boot_printk("Last Breaking-Event-Address:\n");
- boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
- (void *)get_lowcore()->pgm_last_break);
+ boot_emerg("Last Breaking-Event-Address:\n");
+ boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
+ (void *)get_lowcore()->pgm_last_break);
}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 7617aa2d2f7e..aa096ef68e8c 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
- RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
@@ -207,11 +208,16 @@ unsigned long detect_max_physmem_end(void)
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
+ boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+ get_physmem_info_source());
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
+ unsigned long start, end;
+ int i;
+
if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
} else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
@@ -226,12 +232,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end)
} else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end);
}
+ boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+ for_each_physmem_online_range(i, &start, &end)
+ boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end);
}
void physmem_set_usable_limit(unsigned long limit)
{
physmem_info.usable = limit;
physmem_alloc_pos = limit;
+ boot_debug("Usable memory limit: 0x%016lx\n", limit);
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
@@ -241,38 +251,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min,
enum reserved_range_type t;
int i;
- boot_printk("Linux version %s\n", kernel_version);
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
- size, align, min, max);
- boot_printk("Reserved memory ranges:\n");
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ boot_emerg("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
- boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
- boot_printk("Usable online memory ranges (info source: %s [%x]):\n",
- get_physmem_info_source(), physmem_info.info_source);
+ boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
- boot_printk("%016lx %016lx\n", start, end);
+ boot_emerg("%016lx %016lx\n", start, end);
total_mem += end - start;
}
- boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
- total_mem, total_reserved_mem,
- total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
print_stacktrace(current_frame_address());
- boot_printk("\n\n -- System halted\n");
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
-void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
}
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+ get_rr_type_name(type));
+}
+
void physmem_free(enum reserved_range_type type)
{
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+ physmem_info.reserved[type].end, get_rr_type_name(type));
physmem_info.reserved[type].start = 0;
physmem_info.reserved[type].end = 0;
}
@@ -339,41 +358,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
- physmem_reserve(type, addr, size);
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+ get_rr_type_name(type));
return addr;
}
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align)
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom)
{
struct reserved_range *range = &physmem_info.reserved[type];
- struct reserved_range *new_range;
+ struct reserved_range *new_range = NULL;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
- &ranges_left, true);
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
- physmem_alloc_pos = __physmem_alloc_range(
- sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
- physmem_alloc_ranges, &ranges_left, true);
- new_range = (struct reserved_range *)physmem_alloc_pos;
+ addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+ physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ new_range = (struct reserved_range *)addr;
+ addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
*new_range = *range;
range->chain = new_range;
- addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
- ranges_left, &ranges_left, true);
}
range->end = addr + size;
}
+ if (type != RR_VMEM) {
+ boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+ addr, addr + size, get_rr_type_name(type), align, !!new_range);
+ }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
}
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ return physmem_alloc(type, size, align, true);
+}
+
unsigned long get_physmem_alloc_pos(void)
{
return physmem_alloc_pos;
}
+
+void dump_physmem_reserved(void)
+{
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ unsigned long start, end;
+
+ boot_debug("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ if (end) {
+ boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+ get_rr_type_name(t), start, end, (unsigned long)range,
+ (unsigned long)range->chain);
+ }
+ }
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
index 35f18f2b936e..b4c66fa667d5 100644
--- a/arch/s390/boot/printk.c
+++ b/arch/s390/boot/printk.c
@@ -5,21 +5,111 @@
#include <linux/ctype.h>
#include <asm/stacktrace.h>
#include <asm/boot_data.h>
+#include <asm/sections.h>
#include <asm/lowcore.h>
#include <asm/setup.h>
#include <asm/sclp.h>
#include <asm/uv.h>
#include "boot.h"
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+ /* leave double '\0' in the end */
+ size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+ /* store strings separated by '\0' */
+ if (len + 1 > avail)
+ boot_rb_off = 0;
+ strcpy(boot_rb + boot_rb_off, str);
+ boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+ sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+ return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+ if (debug_messages_printed())
+ return;
+ sclp_early_printk("Boot messages ring buffer:\n");
+ boot_rb_foreach(print_rb_entry);
+}
+
const char hex_asc[] = "0123456789abcdef";
static char *as_hex(char *dst, unsigned long val, int pad)
{
- char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+ char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
- for (*p-- = 0; p >= dst; val >>= 4)
+ for (*p-- = '\0'; p >= dst; val >>= 4)
*p-- = hex_asc[val & 0x0f];
- return end;
+ return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+ bool negative = false;
+ char *p = buf + MAX_NUMLEN;
+
+ if (is_signed && (long)val < 0) {
+ val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+ negative = true;
+ }
+
+ *--p = '\0';
+ do {
+ *--p = '0' + (val % 10);
+ val /= 10;
+ } while (val);
+
+ if (negative)
+ *--p = '-';
+ return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+ int _pad, bool zero_pad, bool decimal)
+{
+ ssize_t len = strlen(src), pad = _pad;
+ char *p = dst;
+
+ if (max(len, abs(pad)) >= dst_size)
+ return -E2BIG;
+
+ if (pad > len) {
+ if (decimal && zero_pad && *src == '-') {
+ *p++ = '-';
+ src++;
+ len--;
+ pad--;
+ }
+ memset(p, zero_pad ? '0' : ' ', pad - len);
+ p += pad - len;
+ }
+ memcpy(p, src, len);
+ p += len;
+ if (pad < 0 && -pad > len) {
+ memset(p, ' ', -pad - len);
+ p += -pad - len;
+ }
+ *p = '\0';
+ return p - dst;
}
static char *symstart(char *p)
@@ -58,35 +148,94 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh
return NULL;
}
-static noinline char *strsym(void *ip)
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
{
- static char buf[64];
unsigned short off;
unsigned short len;
char *p;
p = findsym((unsigned long)ip, &off, &len);
if (p) {
- strncpy(buf, p, sizeof(buf));
+ strncpy(buf, p, MAX_SYMLEN);
/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
- p = buf + strnlen(buf, sizeof(buf) - 15);
+ p = buf + strnlen(buf, MAX_SYMLEN - 15);
strcpy(p, "+0x");
- p = as_hex(p + 3, off, 0);
- strcpy(p, "/0x");
- as_hex(p + 3, len, 0);
+ as_hex(p + 3, off, 0);
+ strcat(p, "/0x");
+ as_hex(p + strlen(p), len, 0);
} else {
as_hex(buf, (unsigned long)ip, 16);
}
return buf;
}
-void boot_printk(const char *fmt, ...)
+static inline int printk_loglevel(const char *buf)
+{
+ if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+ switch (buf[1]) {
+ case '0' ... '7':
+ return buf[1] - '0';
+ }
+ }
+ return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+ int level = printk_loglevel(buf);
+
+ /* always print emergency messages */
+ if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+ return;
+ buf = printk_skip_level(buf);
+ /* print debug messages only when bootdebug is enabled */
+ if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+ return;
+ if (boot_ignore_loglevel || level < boot_console_loglevel)
+ sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock;
+ unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod);
+ char ts[MAX_NUMLEN];
+
+ *buf++ = '[';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+ *buf++ = '.';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+ *buf++ = ']';
+ *buf++ = ' ';
+#endif
+ return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod) \
+ ((lenmod == 'l') ? va_arg(args, typemod long) : \
+ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \
+ (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \
+ (lenmod == 'z') ? va_arg(args, typemod long) : \
+ va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
{
char buf[1024] = { 0 };
char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
- unsigned long pad;
- char *p = buf;
+ bool zero_pad, decimal;
+ char *strval, *p = buf;
+ char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
va_list args;
+ char lenmod;
+ ssize_t len;
+ int pad;
+
+ *p++ = KERN_SOH_ASCII;
+ *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+ p = add_timestamp(p);
+ fmt = printk_skip_level(fmt);
va_start(args, fmt);
for (; p < end && *fmt; fmt++) {
@@ -94,31 +243,56 @@ void boot_printk(const char *fmt, ...)
*p++ = *fmt;
continue;
}
- pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
+ if (*++fmt == '%') {
+ *p++ = '%';
+ continue;
+ }
+ zero_pad = (*fmt == '0');
+ pad = simple_strtol(fmt, (char **)&fmt, 10);
+ lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+ if (lenmod == 'h' && *fmt == 'h') {
+ lenmod = 'H';
+ fmt++;
+ }
+ decimal = false;
switch (*fmt) {
case 's':
- p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
+ if (lenmod)
+ goto out;
+ strval = va_arg(args, char *);
+ zero_pad = false;
break;
case 'p':
- if (*++fmt != 'S')
+ if (*++fmt != 'S' || lenmod)
goto out;
- p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
+ strval = strsym(valbuf, va_arg(args, void *));
+ zero_pad = false;
break;
- case 'l':
- if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned long), pad);
+ case 'd':
+ case 'i':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+ decimal = true;
+ break;
+ case 'u':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
case 'x':
- if (end - p <= max(sizeof(int) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned int), pad);
+ strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
default:
goto out;
}
+ len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+ if (len == -E2BIG)
+ break;
+ p += len;
}
out:
va_end(args);
- sclp_early_printk(buf);
+ len = strlen(buf);
+ if (len) {
+ boot_rb_add(buf, len);
+ boot_console_earlyprintk(buf);
+ }
+ return len;
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index e6b06692ddc8..885bd1dd2c82 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
#include <linux/string.h>
#include <linux/elf.h>
#include <asm/page-states.h>
@@ -42,7 +43,8 @@ struct machine_info machine;
void error(char *x)
{
- boot_printk("\n\n%s\n\n -- System halted", x);
+ boot_emerg("%s\n", x);
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
@@ -143,7 +145,7 @@ static void rescue_initrd(unsigned long min, unsigned long max)
return;
old_addr = addr;
physmem_free(RR_INITRD);
- addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ addr = physmem_alloc_or_die(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
}
@@ -222,12 +224,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
if (oldmem_data.start) {
__kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
+ boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
__kaslr_enabled = 0;
- if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+ if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
ident_map_size = min(ident_map_size, hsa_size);
+ boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+ }
}
#endif
+ boot_debug("Identity map size: 0x%016lx\n", ident_map_size);
}
#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
@@ -267,6 +273,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ boot_debug("vmem size estimated: 0x%016lx\n", vsize);
if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
(vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
@@ -290,8 +297,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
* otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
+ boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
#ifdef CONFIG_KASAN
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+ boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
@@ -305,19 +314,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
pos = 0;
kernel_end = vmax - pos * THREAD_SIZE;
kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+ kernel_size + kernel_size);
} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
- boot_printk("The kernel base address is forced to %lx\n", kernel_start);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start,
+ kernel_start + kernel_size);
} else {
kernel_start = __NO_KASLR_START_KERNEL;
+ boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+ kernel_start + kernel_size);
}
__kaslr_offset = kernel_start;
+ boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset);
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
if (IS_ENABLED(CONFIG_KMSAN))
VMALLOC_END -= MODULES_LEN * 2;
+ boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
@@ -329,10 +346,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
VMALLOC_END -= vmalloc_size * 2;
}
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area,
+ __memcpy_real_area + MEMCPY_REAL_SIZE);
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
sizeof(struct lowcore));
+ boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore,
+ __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
/* split remaining virtual space between 1:1 mapping & vmemmap array */
pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
@@ -352,8 +374,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
- if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
- __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+ boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base,
+ __identity_base + ident_map_size);
return asce_limit;
}
@@ -412,6 +437,10 @@ void startup_kernel(void)
psw_t psw;
setup_lpp();
+ store_ipl_parmblock();
+ uv_query_info();
+ setup_boot_command_line();
+ parse_boot_command_line();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
@@ -431,12 +460,8 @@ void startup_kernel(void)
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- store_ipl_parmblock();
read_ipl_report();
- uv_query_info();
sclp_early_read_info();
- setup_boot_command_line();
- parse_boot_command_line();
detect_facilities();
cmma_init();
sanitize_prot_virt_host();
@@ -526,6 +551,7 @@ void startup_kernel(void)
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+ dump_physmem_reserved();
copy_bootdata();
__apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
(struct alt_instr *)_vmlinux_info.alt_instructions_end,
@@ -542,5 +568,6 @@ void startup_kernel(void)
*/
psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
+ boot_debug("Starting kernel at: 0x%016lx\n", psw.addr);
__load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 881a1ece422f..cfca94a8eac4 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -13,6 +14,7 @@
#include "decompressor.h"
#include "boot.h"
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
struct ctlreg __bootdata_preserved(s390_invalid_asce);
#ifdef CONFIG_PROC_FS
@@ -31,12 +33,42 @@ enum populate_mode {
POPULATE_IDENTITY,
POPULATE_KERNEL,
#ifdef CONFIG_KASAN
+ /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
POPULATE_KASAN_SHALLOW
#endif
};
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+ switch (t) {
+ POPULATE_MODE_NAME(NONE);
+ POPULATE_MODE_NAME(DIRECT);
+ POPULATE_MODE_NAME(LOWCORE);
+ POPULATE_MODE_NAME(ABS_LOWCORE);
+ POPULATE_MODE_NAME(IDENTITY);
+ POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+ POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+ POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+ POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+ return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+ return false;
+#endif
+}
+
static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
#ifdef CONFIG_KASAN
@@ -52,9 +84,12 @@ static pte_t pte_z;
static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- pgtable_populate(start, end, mode);
+ unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+ unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+ boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+ start, end, sha_start, sha_end);
+ pgtable_populate(sha_start, sha_end, mode);
}
static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
@@ -200,7 +235,7 @@ static void *boot_crst_alloc(unsigned long val)
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
@@ -216,7 +251,7 @@ static pte_t *boot_pte_alloc(void)
* during POPULATE_KASAN_MAP_SHADOW when EDAT is off
*/
if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
@@ -228,11 +263,12 @@ static pte_t *boot_pte_alloc(void)
return pte;
}
-static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+ enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
- return -1;
+ return INVALID_PHYS_ADDR;
case POPULATE_DIRECT:
return addr;
case POPULATE_LOWCORE:
@@ -245,38 +281,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return __identity_pa(addr);
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
- addr = physmem_alloc_top_down(RR_VMEM, size, size);
- memset((void *)addr, 0, size);
- return addr;
+ /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+ addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+ if (addr) {
+ memset((void *)addr, 0, size);
+ return addr;
+ }
+ return INVALID_PHYS_ADDR;
#endif
default:
- return -1;
+ return INVALID_PHYS_ADDR;
}
}
-static bool large_allowed(enum populate_mode mode)
+static bool large_page_mapping_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
+ switch (mode) {
+ case POPULATE_DIRECT:
+ case POPULATE_IDENTITY:
+ case POPULATE_KERNEL:
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+#endif
+ return true;
+ default:
+ return false;
+ }
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
+
+ if (!machine.has_edat2 || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PUD_SIZE))
+ return INVALID_PHYS_ADDR;
- return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
+ return pa;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
- return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
+ if (!machine.has_edat1 || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -290,7 +352,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (pte_none(*pte)) {
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
- entry = __pte(_pa(addr, PAGE_SIZE, mode));
+ entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
set_pte(pte, entry);
pages++;
@@ -303,7 +365,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -313,8 +375,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next, mode)) {
- entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+ pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pmd(pa);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
set_pmd(pmd, entry);
pages++;
@@ -334,7 +397,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -344,8 +407,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next, mode)) {
- entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+ pa = try_get_large_pud_pa(pud, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pud(pa);
entry = set_pud_bit(entry, REGION3_KERNEL);
set_pud(pud, entry);
pages++;
@@ -388,6 +452,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
pgd_t *pgd;
p4d_t *p4d;
+ if (!is_kasan_populate_mode(mode)) {
+ boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+ get_populate_mode_name(mode), addr, end,
+ resolve_pa_may_alloc(addr, 0, mode),
+ resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+ }
+
pgd = pgd_offset(&init_mm, addr);
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index 4a6b0a8b6412..2e829c16fd8a 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -9,11 +9,11 @@
#define EX_TYPE_NONE 0
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
-#define EX_TYPE_UA_STORE 3
-#define EX_TYPE_UA_LOAD_MEM 4
+#define EX_TYPE_UA_FAULT 3
#define EX_TYPE_UA_LOAD_REG 5
#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_TYPE_ZEROPAD 7
+#define EX_TYPE_FPC 8
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -69,11 +69,8 @@
#define EX_TABLE_AMODE31(_fault, _target) \
__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
-#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
-
-#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0)
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
@@ -84,4 +81,7 @@
#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
+#define EX_TABLE_FPC(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h
index ec011b94af2a..e9062b01e2a2 100644
--- a/arch/s390/include/asm/asm.h
+++ b/arch/s390/include/asm/asm.h
@@ -28,7 +28,7 @@
* [var] also contains the program mask. CC_TRANSFORM() moves the condition
* code to the two least significant bits and sets all other bits to zero.
*/
-#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN))
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN))
#define __HAVE_ASM_FLAG_OUTPUTS__
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 15aa64e3020e..d5125296ade2 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -60,7 +60,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig
asm volatile(
" tm %[addr],%[mask]\n"
: "=@cc" (cc)
- : [addr] "R" (*addr), [mask] "I" (mask)
+ : [addr] "Q" (*addr), [mask] "I" (mask)
);
return cc == 3;
}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index f7eed27b3220..f55f8227058e 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_BOOT_DATA_H
+#include <linux/string.h>
#include <asm/setup.h>
#include <asm/ipl.h>
@@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size;
extern unsigned long early_ipl_comp_list_addr;
extern unsigned long early_ipl_comp_list_size;
+extern char boot_rb[PAGE_SIZE * 2];
+extern bool boot_earlyprintk;
+extern size_t boot_rb_off;
+extern char bootdebug_filter[128];
+extern bool bootdebug;
+
+#define boot_rb_foreach(cb) \
+ do { \
+ size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \
+ size_t len; \
+ for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ } while (0)
+
+/*
+ * bootdebug_filter is a comma separated list of strings,
+ * where each string can be a prefix of the message.
+ */
+static inline bool bootdebug_filter_match(const char *buf)
+{
+ char *p = bootdebug_filter, *s;
+ char *end;
+
+ if (!*p)
+ return true;
+
+ end = p + strlen(p);
+ while (p < end) {
+ p = skip_spaces(p);
+ s = memscan(p, ',', end - p);
+ if (!strncmp(p, buf, s - p))
+ return true;
+ p = s + 1;
+ }
+ return false;
+}
+
+static inline const char *skip_timestamp(const char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ const char *p = memchr(buf, ']', strlen(buf));
+
+ if (p && p[1] == ' ')
+ return p + 2;
+#endif
+ return buf;
+}
+
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index de510c9f6efa..f668bffd6dd3 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc)
*/
static inline void fpu_lfpc_safe(unsigned int *fpc)
{
- u32 tmp;
-
instrument_read(fpc, sizeof(*fpc));
asm_inline volatile(
- "0: lfpc %[fpc]\n"
- "1: nopr %%r7\n"
- ".pushsection .fixup, \"ax\"\n"
- "2: lghi %[tmp],0\n"
- " sfpc %[tmp]\n"
- " jg 1b\n"
- ".popsection\n"
- EX_TABLE(1b, 2b)
- : [tmp] "=d" (tmp)
+ " lfpc %[fpc]\n"
+ "0: nopr %%r7\n"
+ EX_TABLE_FPC(0b, 0b)
+ :
: [fpc] "Q" (*fpc)
: "memory");
}
@@ -183,7 +176,19 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+ instrument_read(vxr, sizeof(__vector128));
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
@@ -197,19 +202,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
-
-static __always_inline void fpu_vl(u8 v1, const void *vxr)
-{
- instrument_read(vxr, sizeof(__vector128));
- asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
- :
- : [vxr] "Q" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory");
-}
-
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
{
@@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
return val;
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile(
- " la 1,%[vxr]\n"
- " VLL %[v1],%[index],0,1\n"
- :
- : [vxr] "R" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
- :
- : [vxr] "Q" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VLL %[v1],%[index],0,1\n"
+ :
+ : [vxr] "R" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile( \
- " la 1,%[vxrs]\n" \
- " VLM %[v1],%[v3],0,1\n" \
- : \
- : [vxrs] "R" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : \
- : [vxrs] "Q" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1\n" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vlr(u8 v1, u8 v2)
{
@@ -362,7 +355,18 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
@@ -375,20 +379,23 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-static __always_inline void fpu_vst(u8 v1, const void *vxr)
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
- instrument_write(vxr, sizeof(__vector128));
- asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
- : [vxr] "=Q" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
: "memory");
}
-#endif /* CONFIG_CC_IS_CLANG */
-
-#ifdef CONFIG_CC_IS_CLANG
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -404,23 +411,9 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
-{
- unsigned int size;
-
- size = min(index + 1, sizeof(__vector128));
- instrument_write(vxr, size);
- asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
- : [vxr] "=Q" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory");
-}
-
-#endif /* CONFIG_CC_IS_CLANG */
-
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile( \
- " la 1,%[vxrs]\n" \
- " VSTM %[v1],%[v3],0,1\n" \
- : [vxrs] "=R" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : [vxrs] "=Q" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1\n" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vupllf(u8 v1, u8 v2)
{
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index a3b73a4f626e..185331e91f83 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -51,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip))
#include <linux/ftrace_regs.h>
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 752a2310f0d6..f5781794356b 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -2,80 +2,95 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
+#include <linux/instrumented.h>
#include <linux/uaccess.h>
#include <linux/futex.h>
#include <asm/asm-extable.h>
#include <asm/mmu_context.h>
#include <asm/errno.h>
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
- EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
+#define FUTEX_OP_FUNC(name, insn) \
+static uaccess_kmsan_or_inline int \
+__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
+{ \
+ int rc, new; \
+ \
+ instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
+ asm_inline volatile( \
+ " sacf 256\n" \
+ "0: l %[old],%[uaddr]\n" \
+ "1:"insn \
+ "2: cs %[old],%[new],%[uaddr]\n" \
+ "3: jl 1b\n" \
+ " lhi %[rc],0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \
+ : [rc] "=d" (rc), [old] "=&d" (*old), \
+ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
+ : [oparg] "d" (oparg) \
+ : "cc"); \
+ if (!rc) \
+ instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
+ return rc; \
+}
+
+FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n")
+FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n")
+FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n")
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+static inline
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int oldval = 0, newval, ret;
+ int old, rc;
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_set(oparg, &old, uaddr);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_add(oparg, &old, uaddr);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_or(oparg, &old, uaddr);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, ~oparg);
+ rc = __futex_atomic_and(~oparg, &old, uaddr);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_xor(oparg, &old, uaddr);
break;
default:
- ret = -ENOSYS;
+ rc = -ENOSYS;
}
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ if (!rc)
+ *oval = old;
+ return rc;
}
-static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+static uaccess_kmsan_or_inline
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
- int ret;
+ int rc;
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2: sacf 768\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+ asm_inline volatile(
+ " sacf 256\n"
+ "0: cs %[old],%[new],%[uaddr]\n"
+ "1: lhi %[rc],0\n"
+ "2: sacf 768\n"
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc])
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc])
+ : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
+ : [new] "d" (newval)
: "cc", "memory");
*uval = oldval;
- return ret;
+ instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
+ return rc;
}
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 4f43cdd9835b..1ff145f7b52b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -184,7 +184,11 @@ extern struct vm_layout vm_layout;
#define __kaslr_offset vm_layout.kaslr_offset
#define __kaslr_offset_phys vm_layout.kaslr_offset_phys
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
#define __identity_base vm_layout.identity_base
+#else
+#define __identity_base 0UL
+#endif
#define ident_map_size vm_layout.identity_size
static inline unsigned long kaslr_offset(void)
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 51b68a43e195..7ef3bbec98b0 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -26,7 +26,7 @@ enum reserved_range_type {
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
- RR_MEM_DETECT_EXTENDED,
+ RR_MEM_DETECT_EXT,
RR_VMEM,
RR_MAX
};
@@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
RR_TYPE_NAME(AMODE31);
RR_TYPE_NAME(IPLREPORT);
RR_TYPE_NAME(CERT_COMP_LIST);
- RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+ RR_TYPE_NAME(MEM_DETECT_EXT);
RR_TYPE_NAME(VMEM);
default:
return "UNKNOWN";
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 4da3b2956285..18f37dff03c9 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -172,6 +172,7 @@ void sclp_early_printk(const char *s);
void __sclp_early_printk(const char *s, unsigned int len);
void sclp_emergency_printk(const char *s);
+int sclp_init(void);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
int _sclp_get_core_info(struct sclp_core_info *info);
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a81f897a81ce..f5920163ee97 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -22,16 +22,117 @@
void debug_user_asce(int exit);
-unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
-unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+static __always_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
+{
+ unsigned long rem;
+ union oac spec = {
+ .oac2.key = key,
+ .oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.k = 1,
+ .oac2.a = 1,
+ };
-#ifndef CONFIG_KASAN
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-#endif
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
+ "1: jz 5f\n"
+ " algr %[size],%[val]\n"
+ " slgr %[from],%[val]\n"
+ " slgr %[to],%[val]\n"
+ " j 0b\n"
+ "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
+ " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
+ " slgr %[rem],%[from]\n"
+ " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
+ "4: slgr %[size],%[rem]\n"
+ " j 6f\n"
+ "5: lghi %[size],0\n"
+ "6:\n"
+ EX_TABLE(0b, 2b)
+ EX_TABLE(1b, 2b)
+ EX_TABLE(3b, 6b)
+ EX_TABLE(4b, 6b)
+ : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
+ : [val] "a" (-4096UL), [spec] "d" (spec.val)
+ : "cc", "memory", "0");
+ return size;
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return raw_copy_from_user_key(to, from, n, 0);
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
+{
+ unsigned long rem;
+ union oac spec = {
+ .oac1.key = key,
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.k = 1,
+ .oac1.a = 1,
+ };
+
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
+ "1: jz 5f\n"
+ " algr %[size],%[val]\n"
+ " slgr %[to],%[val]\n"
+ " slgr %[from],%[val]\n"
+ " j 0b\n"
+ "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
+ " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
+ " slgr %[rem],%[to]\n"
+ " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
+ "4: slgr %[size],%[rem]\n"
+ " j 6f\n"
+ "5: lghi %[size],0\n"
+ "6:\n"
+ EX_TABLE(0b, 2b)
+ EX_TABLE(1b, 2b)
+ EX_TABLE(3b, 6b)
+ EX_TABLE(4b, 6b)
+ : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
+ : [val] "a" (-4096UL), [spec] "d" (spec.val)
+ : "cc", "memory", "0");
+ return size;
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return raw_copy_to_user_key(to, from, n, 0);
+}
unsigned long __must_check
_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
@@ -55,63 +156,71 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo
return n;
}
-union oac {
- unsigned int val;
- struct {
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac1;
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac2;
- };
-};
-
int __noreturn __put_user_bad(void);
#ifdef CONFIG_KMSAN
-#define get_put_user_noinstr_attributes \
- noinline __maybe_unused __no_sanitize_memory
+#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory
#else
-#define get_put_user_noinstr_attributes __always_inline
+#define uaccess_kmsan_or_inline __always_inline
#endif
-#define DEFINE_PUT_USER(type) \
-static get_put_user_noinstr_attributes int \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__put_user_##type##_noinstr(unsigned type __user *to, \
unsigned type *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac1.as = PSW_BITS_AS_SECONDARY, \
- .oac1.a = 1, \
- }; \
int rc; \
\
asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_PUT_USER_NOINSTR(char);
+DEFINE_PUT_USER_NOINSTR(short);
+DEFINE_PUT_USER_NOINSTR(int);
+DEFINE_PUT_USER_NOINSTR(long);
+
+#define DEFINE_PUT_USER(type) \
static __always_inline int \
__put_user_##type(unsigned type __user *to, unsigned type *from, \
unsigned long size) \
@@ -128,69 +237,111 @@ DEFINE_PUT_USER(short);
DEFINE_PUT_USER(int);
DEFINE_PUT_USER(long);
-static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
- int rc;
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __prc; \
+ \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __prc = __put_user_char((unsigned char __user *)(ptr), \
+ (unsigned char *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 2: \
+ __prc = __put_user_short((unsigned short __user *)(ptr),\
+ (unsigned short *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 4: \
+ __prc = __put_user_int((unsigned int __user *)(ptr), \
+ (unsigned int *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 8: \
+ __prc = __put_user_long((unsigned long __user *)(ptr), \
+ (unsigned long *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ default: \
+ __prc = __put_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__prc, 0); \
+})
- switch (size) {
- case 1:
- rc = __put_user_char((unsigned char __user *)ptr,
- (unsigned char *)x,
- size);
- break;
- case 2:
- rc = __put_user_short((unsigned short __user *)ptr,
- (unsigned short *)x,
- size);
- break;
- case 4:
- rc = __put_user_int((unsigned int __user *)ptr,
- (unsigned int *)x,
- size);
- break;
- case 8:
- rc = __put_user_long((unsigned long __user *)ptr,
- (unsigned long *)x,
- size);
- break;
- default:
- __put_user_bad();
- break;
- }
- return rc;
-}
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
+})
int __noreturn __get_user_bad(void);
-#define DEFINE_GET_USER(type) \
-static get_put_user_noinstr_attributes int \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__get_user_##type##_noinstr(unsigned type *to, \
- unsigned type __user *from, \
+ const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ *to = 0; \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ const unsigned type __user *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac2.as = PSW_BITS_AS_SECONDARY, \
- .oac2.a = 1, \
- }; \
int rc; \
\
asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
- EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
- : [rc] "=&d" (rc), "=Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val), [_to] "a" (to), \
- [_ksize] "K" (size) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
+ if (likely(!rc)) \
+ return 0; \
+ *to = 0; \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_GET_USER_NOINSTR(char);
+DEFINE_GET_USER_NOINSTR(short);
+DEFINE_GET_USER_NOINSTR(int);
+DEFINE_GET_USER_NOINSTR(long);
+
+#define DEFINE_GET_USER(type) \
static __always_inline int \
-__get_user_##type(unsigned type *to, unsigned type __user *from, \
+__get_user_##type(unsigned type *to, const unsigned type __user *from, \
unsigned long size) \
{ \
int rc; \
@@ -205,107 +356,50 @@ DEFINE_GET_USER(short);
DEFINE_GET_USER(int);
DEFINE_GET_USER(long);
-static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
- int rc;
-
- switch (size) {
- case 1:
- rc = __get_user_char((unsigned char *)x,
- (unsigned char __user *)ptr,
- size);
- break;
- case 2:
- rc = __get_user_short((unsigned short *)x,
- (unsigned short __user *)ptr,
- size);
- break;
- case 4:
- rc = __get_user_int((unsigned int *)x,
- (unsigned int __user *)ptr,
- size);
- break;
- case 8:
- rc = __get_user_long((unsigned long *)x,
- (unsigned long __user *)ptr,
- size);
- break;
- default:
- __get_user_bad();
- break;
- }
- return rc;
-}
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
-#define __put_user(x, ptr) \
-({ \
- __typeof__(*(ptr)) __x = (x); \
- int __pu_err = -EFAULT; \
- \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- case 2: \
- case 4: \
- case 8: \
- __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \
- break; \
- default: \
- __put_user_bad(); \
- break; \
- } \
- __builtin_expect(__pu_err, 0); \
-})
-
-#define put_user(x, ptr) \
-({ \
- might_fault(); \
- __put_user(x, ptr); \
-})
-
#define __get_user(x, ptr) \
({ \
- int __gu_err = -EFAULT; \
+ const __user void *____guptr = (ptr); \
+ int __grc; \
\
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: { \
+ const unsigned char __user *__guptr = ____guptr; \
unsigned char __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 2: { \
+ const unsigned short __user *__guptr = ____guptr; \
unsigned short __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 4: { \
+ const unsigned int __user *__guptr = ____guptr; \
unsigned int __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 8: { \
+ const unsigned long __user *__guptr = ____guptr; \
unsigned long __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
default: \
- __get_user_bad(); \
+ __grc = __get_user_bad(); \
break; \
} \
- __builtin_expect(__gu_err, 0); \
+ __builtin_expect(__grc, 0); \
})
#define get_user(x, ptr) \
@@ -341,109 +435,71 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
return __s390_kernel_write(dst, src, size);
}
-int __noreturn __put_kernel_bad(void);
+void __noreturn __mvc_kernel_nofault_bad(void);
-#define __put_kernel_asm(val, to, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_to]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \
- : [_val] "d" (val) \
- : "cc"); \
- __rc; \
-})
+#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS)
-#define __put_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- unsigned long __x = (unsigned long)(*((type *)(src))); \
- int __pk_err; \
- \
switch (sizeof(type)) { \
case 1: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
- break; \
case 2: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
- break; \
case 4: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
- break; \
case 8: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ asm goto( \
+ "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[err_label]) \
+ EX_TABLE(1b, %l[err_label]) \
+ : [_dst] "=Q" (*(type *)dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_len] "I" (sizeof(type)) \
+ : \
+ : err_label); \
break; \
default: \
- __pk_err = __put_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__pk_err)) \
- goto err_label; \
} while (0)
-int __noreturn __get_kernel_bad(void);
-
-#define __get_kernel_asm(val, from, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_from]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \
- EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \
- : [rc] "=d" (__rc), [_val] "=d" (val) \
- : [_from] "Q" (*(from)) \
- : "cc"); \
- __rc; \
-})
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- int __gk_err; \
+ type *(__dst) = (type *)(dst); \
+ int __rc; \
\
switch (sizeof(type)) { \
- case 1: { \
- unsigned char __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 2: { \
- unsigned short __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 4: { \
- unsigned int __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 8: { \
- unsigned long __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
- *((type *)(dst)) = (type)__x; \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ asm_inline volatile( \
+ "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \
+ "1: lhi %[_rc],0\n" \
+ "2:\n" \
+ EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \
+ : [_rc] "=d" (__rc), \
+ "=m" (*__dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_dst] "a" (__dst), \
+ [_len] "I" (sizeof(type))); \
+ if (__rc) \
+ goto err_label; \
break; \
- }; \
default: \
- __gk_err = __get_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__gk_err)) \
- goto err_label; \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#define __get_kernel_nofault __mvc_kernel_nofault
+#define __put_kernel_nofault __mvc_kernel_nofault
+
void __cmpxchg_user_key_called_with_bad_pointer(void);
#define CMPXCHG_USER_KEY_MAX_LOOPS 128
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 62f8f5a750a3..2fa25164df7d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -50,6 +50,7 @@ decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -58,7 +59,7 @@ static void __init kasan_early_init(void)
{
#ifdef CONFIG_KASAN
init_task.kasan_depth = 0;
- sclp_early_printk("KernelAddressSanitizer initialized\n");
+ pr_info("KernelAddressSanitizer initialized\n");
#endif
}
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c0b2c97efefb..63ba6306632e 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -266,18 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
- int bit;
if (unlikely(ftrace_graph_is_dead()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
- bit = ftrace_test_recursion_trylock(ip, *parent);
- if (bit < 0)
- return;
if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
*parent = (unsigned long)&return_to_handler;
- ftrace_test_recursion_unlock(bit);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1298f0860733..d78bcfe707b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -157,6 +157,12 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
struct oldmem_data __bootdata_preserved(oldmem_data);
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
@@ -686,7 +692,7 @@ static void __init reserve_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_reserve(addr, size);
}
@@ -694,7 +700,7 @@ static void __init free_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_phys_free(addr, size);
}
@@ -724,7 +730,7 @@ static void __init reserve_lowcore(void)
void *lowcore_end = lowcore_start + sizeof(struct lowcore);
void *start, *end;
- if ((void *)__identity_base < lowcore_end) {
+ if (absolute_pointer(__identity_base) < lowcore_end) {
start = max(lowcore_start, (void *)__identity_base);
end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
memblock_reserve(__pa(start), __pa(end));
@@ -866,6 +872,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -884,6 +907,9 @@ void __init setup_arch(char **cmdline_p)
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
if (have_relocated_lowcore())
pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
@@ -987,3 +1013,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 377b9aaf8c92..ff1ddba96352 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,7 +52,6 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
- *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index c7c269d5c491..f977b7c37efc 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -31,51 +31,6 @@ void debug_user_asce(int exit)
}
#endif /*CONFIG_DEBUG_ENTRY */
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
- unsigned long size, unsigned long key)
-{
- unsigned long rem;
- union oac spec = {
- .oac2.key = key,
- .oac2.as = PSW_BITS_AS_SECONDARY,
- .oac2.k = 1,
- .oac2.a = 1,
- };
-
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[from],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
- " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
- " slgr %[rem],%[from]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return raw_copy_from_user_key(to, from, n, 0);
-}
-EXPORT_SYMBOL(raw_copy_from_user);
-
unsigned long _copy_from_user_key(void *to, const void __user *from,
unsigned long n, unsigned long key)
{
@@ -93,51 +48,6 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
}
EXPORT_SYMBOL(_copy_from_user_key);
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
- unsigned long size, unsigned long key)
-{
- unsigned long rem;
- union oac spec = {
- .oac1.key = key,
- .oac1.as = PSW_BITS_AS_SECONDARY,
- .oac1.k = 1,
- .oac1.a = 1,
- };
-
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " slgr %[from],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return raw_copy_to_user_key(to, from, n, 0);
-}
-EXPORT_SYMBOL(raw_copy_to_user);
-
unsigned long _copy_to_user_key(void __user *to, const void *from,
unsigned long n, unsigned long key)
{
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 0a0738a473af..a046be1715cf 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -7,6 +7,7 @@
#include <linux/panic.h>
#include <asm/asm-extable.h>
#include <asm/extable.h>
+#include <asm/fpu.h>
const struct exception_table_entry *s390_search_extables(unsigned long addr)
{
@@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r
return true;
}
-static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs)
{
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
@@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p
return true;
}
-static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
-{
- unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
- unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
- size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
-
- regs->gprs[reg_err] = -EFAULT;
- memset((void *)regs->gprs[reg_addr], 0, len);
- regs->psw.addr = extable_fixup(ex);
- return true;
-}
-
static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
bool pair, struct pt_regs *regs)
{
@@ -77,6 +66,13 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt
return true;
}
+static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ fpu_sfpc(0);
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -89,16 +85,16 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
- case EX_TYPE_UA_STORE:
- return ex_handler_ua_store(ex, regs);
- case EX_TYPE_UA_LOAD_MEM:
- return ex_handler_ua_load_mem(ex, regs);
+ case EX_TYPE_UA_FAULT:
+ return ex_handler_ua_fault(ex, regs);
case EX_TYPE_UA_LOAD_REG:
return ex_handler_ua_load_reg(ex, false, regs);
case EX_TYPE_UA_LOAD_REGPAIR:
return ex_handler_ua_load_reg(ex, true, regs);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(ex, regs);
+ case EX_TYPE_FPC:
+ return ex_handler_fpc(ex, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7c684c54e721..8ead999e340b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -662,7 +662,7 @@ void __init vmem_map_init(void)
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
if (debug_pagealloc_enabled())
- __set_memory_4k(__va(0), __va(0) + ident_map_size);
+ __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index d5ace00d10f0..857afbc4828f 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void)
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
- zpci_is_device_configured(zdev) &&
!zdev->vfn;
}
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 24eccaa29337..bdcf2a3b6c41 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index a1bc02b29c81..7d76c417f83f 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b)
return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
}
+static void print_insn_name(const char *name)
+{
+ size_t i, len;
+
+ len = strlen(name);
+ printf("{");
+ for (i = 0; i < len; i++)
+ printf(" \'%c\',", name[i]);
+ printf(" }");
+}
+
static void print_long_insn(struct gen_opcode *desc)
{
struct insn *insn;
@@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc)
insn = &desc->insn[i];
if (insn->name_len < 6)
continue;
- printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+ printf("\t[LONG_INSN_%s] = ", insn->upper);
+ print_insn_name(insn->name);
+ printf(", \\\n");
}
printf("}\n\n");
}
@@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr)
if (insn->type->byte != 0)
opcode += 2;
printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
- if (insn->name_len < 6)
- printf(".name = \"%s\" ", insn->name);
- else
- printf(".offset = LONG_INSN_%s ", insn->upper);
- printf("}, \\\n");
+ if (insn->name_len < 6) {
+ printf(".name = ");
+ print_insn_name(insn->name);
+ } else {
+ printf(".offset = LONG_INSN_%s", insn->upper);
+ }
+ printf(" }, \\\n");
}
static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7c15d6e83c37..dae6a73be40e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -227,6 +227,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
}
static int __init
+acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end)
+{
+ struct acpi_madt_local_apic *processor = NULL;
+
+ processor = (struct acpi_madt_local_apic *)header;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ /* Ignore invalid ID */
+ if (processor->id == 0xff)
+ return 0;
+
+ /* Ignore processors that can not be onlined */
+ if (!acpi_is_processor_usable(processor->lapic_flags))
+ return 0;
+
+ has_lapic_cpus = true;
+ return 0;
+}
+
+static int __init
acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic *processor = NULL;
@@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
- has_lapic_cpus = true;
return 0;
}
@@ -1026,6 +1047,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
static int __init acpi_parse_madt_lapic_entries(void)
{
int count, x2count = 0;
+ struct acpi_subtable_proc madt_proc[2];
+ int ret;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
@@ -1034,10 +1057,27 @@ static int __init acpi_parse_madt_lapic_entries(void)
acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
- count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_LOCAL_APIC);
- x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_LOCAL_APIC);
+ /* Check if there are valid LAPIC entries */
+ acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC);
+
+ /*
+ * Enumerate the APIC IDs in the order that they appear in the
+ * MADT, no matter LAPIC entry or x2APIC entry is used.
+ */
+ memset(madt_proc, 0, sizeof(madt_proc));
+ madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+ madt_proc[0].handler = acpi_parse_lapic;
+ madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+ madt_proc[1].handler = acpi_parse_x2apic;
+ ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
+ sizeof(struct acpi_table_madt),
+ madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+ if (ret < 0) {
+ pr_err("Error parsing LAPIC/X2APIC entries\n");
+ return ret;
+ }
+ count = madt_proc[0].count;
+ x2count = madt_proc[1].count;
}
if (!count && !x2count) {
pr_err("No LAPIC entries present\n");
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index cb45ef5240da..068c1612660b 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -408,6 +408,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
},
{
+ /* Vexia Edu Atla 10 tablet 5V version */
+ .matches = {
+ /* Having all 3 of these not set is somewhat unique */
+ DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."),
+ DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."),
+ /* Above strings are too generic, also match on BIOS date */
+ DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ },
+ {
/* Vexia Edu Atla 10 tablet 9V version */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index cbc9a7a75def..d497d448e4b2 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy
* so change its status accordingly.
*
* Otherwise, the device is going to be resumed, so set its PM-runtime
- * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
- * to avoid confusing drivers that don't use it.
+ * status to "active" unless its power.set_active flag is clear, in
+ * which case it is not necessary to update its PM-runtime status.
*/
- if (skip_resume)
+ if (skip_resume) {
pm_runtime_set_suspended(dev);
- else if (dev_pm_skip_suspend(dev))
+ } else if (dev->power.set_active) {
pm_runtime_set_active(dev);
+ dev->power.set_active = false;
+ }
if (dev->pm_domain) {
info = "noirq power domain ";
@@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state)
return PMSG_ON;
}
-static void dpm_superior_set_must_resume(struct device *dev)
+static void dpm_superior_set_must_resume(struct device *dev, bool set_active)
{
struct device_link *link;
int idx;
- if (dev->parent)
+ if (dev->parent) {
dev->parent->power.must_resume = true;
+ if (set_active)
+ dev->parent->power.set_active = true;
+ }
idx = device_links_read_lock();
- list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+ list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
link->supplier->power.must_resume = true;
+ if (set_active)
+ link->supplier->power.set_active = true;
+ }
device_links_read_unlock(idx);
}
@@ -1278,8 +1286,11 @@ Skip:
dev->power.may_skip_resume))
dev->power.must_resume = true;
- if (dev->power.must_resume)
- dpm_superior_set_must_resume(dev);
+ if (dev->power.must_resume) {
+ dev->power.set_active = dev->power.set_active ||
+ dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
+ dpm_superior_set_must_resume(dev, dev->power.set_active);
+ }
Complete:
complete_all(&dev->power.completion);
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 1230045d78a5..aa5ec1d444a9 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -1381,13 +1381,12 @@ static void btnxpuart_tx_work(struct work_struct *work)
while ((skb = nxp_dequeue(nxpdev))) {
len = serdev_device_write_buf(serdev, skb->data, skb->len);
- serdev_device_wait_until_sent(serdev, 0);
hdev->stat.byte_tx += len;
skb_pull(skb, len);
if (skb->len > 0) {
skb_queue_head(&nxpdev->txq, skb);
- break;
+ continue;
}
switch (hci_skb_pkt_type(skb)) {
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 9aa018d4f6f5..90966dfbd278 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -899,11 +899,6 @@ static void btusb_reset(struct hci_dev *hdev)
struct btusb_data *data;
int err;
- if (hdev->reset) {
- hdev->reset(hdev);
- return;
- }
-
data = hci_get_drvdata(hdev);
/* This is not an unbalanced PM reference since the device will reset */
err = usb_autopm_get_interface(data->intf);
@@ -2639,8 +2634,15 @@ static void btusb_mtk_claim_iso_intf(struct btusb_data *data)
struct btmtk_data *btmtk_data = hci_get_priv(data->hdev);
int err;
+ /*
+ * The function usb_driver_claim_interface() is documented to need
+ * locks held if it's not called from a probe routine. The code here
+ * is called from the hci_power_on workqueue, so grab the lock.
+ */
+ device_lock(&btmtk_data->isopkt_intf->dev);
err = usb_driver_claim_interface(&btusb_driver,
btmtk_data->isopkt_intf, data);
+ device_unlock(&btmtk_data->isopkt_intf->dev);
if (err < 0) {
btmtk_data->isopkt_intf = NULL;
bt_dev_err(data->hdev, "Failed to claim iso interface");
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 704e84d00639..0ee5c691fb36 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
config ARM_AIROHA_SOC_CPUFREQ
tristate "Airoha EN7581 SoC CPUFreq support"
- depends on ARCH_AIROHA || COMPILE_TEST
+ depends on (ARCH_AIROHA && OF) || COMPILE_TEST
select PM_OPP
default ARCH_AIROHA
help
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 302df42d6887..463b69a2dff5 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
pr_warn(FW_WARN "P-state 0 is not max freq\n");
- if (acpi_cpufreq_driver.set_boost) {
- set_boost(policy, acpi_cpufreq_driver.boost_enabled);
- policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
- }
-
return result;
err_unreg:
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 2486a6c5256a..8f512448382f 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
*/
policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
- policy->max = cppc_perf_to_khz(caps, caps->nominal_perf);
+ policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ?
+ caps->highest_perf : caps->nominal_perf);
/*
* Set cpuinfo.min_freq to Lowest to make the full range of performance
@@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
* nonlinear perf
*/
policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
- policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf);
+ policy->cpuinfo.max_freq = policy->max;
policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
policy->shared_type = cpu_data->shared_type;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1076e37a18ad..e0048856ecee 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu)
goto out_free_policy;
}
- /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
- if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
- policy->boost_enabled = true;
-
/*
* The initialization has succeeded and the policy is online.
* If there is a problem with its frequency table, take it
@@ -1476,6 +1472,10 @@ static int cpufreq_online(unsigned int cpu)
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_CREATE_POLICY, policy);
+ } else {
+ ret = freq_qos_update_request(policy->max_freq_req, policy->max);
+ if (ret < 0)
+ goto out_destroy_policy;
}
if (cpufreq_driver->get && has_target()) {
@@ -1570,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu)
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
policy->cdev = of_cpufreq_cooling_register(policy);
+ /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
+ if (policy->boost_enabled != cpufreq_boost_enabled()) {
+ policy->boost_enabled = cpufreq_boost_enabled();
+ ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
+ if (ret) {
+ /* If the set_boost fails, the online operation is not affected */
+ pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
+ policy->boost_enabled ? "enable" : "disable");
+ policy->boost_enabled = !policy->boost_enabled;
+ }
+ }
+
pr_debug("initialization complete\n");
return 0;
diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
index c6bdfc308e99..9cef71528076 100644
--- a/drivers/cpufreq/s3c64xx-cpufreq.c
+++ b/drivers/cpufreq/s3c64xx-cpufreq.c
@@ -24,6 +24,7 @@ struct s3c64xx_dvfs {
unsigned int vddarm_max;
};
+#ifdef CONFIG_REGULATOR
static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
[0] = { 1000000, 1150000 },
[1] = { 1050000, 1150000 },
@@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
[3] = { 1200000, 1350000 },
[4] = { 1300000, 1350000 },
};
+#endif
static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
{ 0, 0, 66000 },
@@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int index)
{
- struct s3c64xx_dvfs *dvfs;
- unsigned int old_freq, new_freq;
+ unsigned int new_freq = s3c64xx_freq_table[index].frequency;
int ret;
+#ifdef CONFIG_REGULATOR
+ struct s3c64xx_dvfs *dvfs;
+ unsigned int old_freq;
+
old_freq = clk_get_rate(policy->clk) / 1000;
- new_freq = s3c64xx_freq_table[index].frequency;
dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
-#ifdef CONFIG_REGULATOR
if (vddarm && new_freq > old_freq) {
ret = regulator_set_voltage(vddarm,
dvfs->vddarm_min,
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 173ddcac540a..8fe5e1b47ef9 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -41,11 +41,7 @@
* idle state 2, the third bin spans from the target residency of idle state 2
* up to, but not including, the target residency of idle state 3 and so on.
* The last bin spans from the target residency of the deepest idle state
- * supplied by the driver to the scheduler tick period length or to infinity if
- * the tick period length is less than the target residency of that state. In
- * the latter case, the governor also counts events with the measured idle
- * duration between the tick period length and the target residency of the
- * deepest idle state.
+ * supplied by the driver to infinity.
*
* Two metrics called "hits" and "intercepts" are associated with each bin.
* They are updated every time before selecting an idle state for the given CPU
@@ -60,6 +56,10 @@
* into by the sleep length (these events are also referred to as "intercepts"
* below).
*
+ * The governor also counts "intercepts" with the measured idle duration below
+ * the tick period length and uses this information when deciding whether or not
+ * to stop the scheduler tick.
+ *
* In order to select an idle state for a CPU, the governor takes the following
* steps (modulo the possible latency constraint that must be taken into account
* too):
@@ -106,6 +106,12 @@
#include "gov.h"
/*
+ * Idle state exit latency threshold used for deciding whether or not to check
+ * the time till the closest expected timer event.
+ */
+#define LATENCY_THRESHOLD_NS (RESIDENCY_THRESHOLD_NS / 2)
+
+/*
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
* is used for decreasing metrics on a regular basis.
*/
@@ -124,18 +130,20 @@ struct teo_bin {
/**
* struct teo_cpu - CPU data used by the TEO cpuidle governor.
- * @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @state_bins: Idle state data bins for this CPU.
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
- * @tick_hits: Number of "hits" after TICK_NSEC.
+ * @tick_intercepts: "Intercepts" before TICK_NSEC.
+ * @short_idles: Wakeups after short idle periods.
+ * @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
*/
struct teo_cpu {
- s64 time_span_ns;
s64 sleep_length_ns;
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
unsigned int total;
- unsigned int tick_hits;
+ unsigned int tick_intercepts;
+ unsigned int short_idles;
+ bool artificial_wakeup;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -152,23 +160,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
s64 target_residency_ns;
u64 measured_ns;
- if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
+ cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
+
+ if (cpu_data->artificial_wakeup) {
/*
- * One of the safety nets has triggered or the wakeup was close
- * enough to the closest timer event expected at the idle state
- * selection time to be discarded.
+ * If one of the safety nets has triggered, assume that this
+ * might have been a long sleep.
*/
measured_ns = U64_MAX;
} else {
u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
- /*
- * The computations below are to determine whether or not the
- * (saved) time till the next timer event and the measured idle
- * duration fall into the same "bin", so use last_residency_ns
- * for that instead of time_span_ns which includes the cpuidle
- * overhead.
- */
measured_ns = dev->last_residency_ns;
/*
* The delay between the wakeup and the first instruction
@@ -176,14 +178,16 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* time, so take 1/2 of the exit latency as a very rough
* approximation of the average of it.
*/
- if (measured_ns >= lat_ns)
+ if (measured_ns >= lat_ns) {
measured_ns -= lat_ns / 2;
- else
+ if (measured_ns < RESIDENCY_THRESHOLD_NS)
+ cpu_data->short_idles += PULSE;
+ } else {
measured_ns /= 2;
+ cpu_data->short_idles += PULSE;
+ }
}
- cpu_data->total = 0;
-
/*
* Decay the "hits" and "intercepts" metrics for all of the bins and
* find the bins that the sleep length and the measured idle duration
@@ -195,8 +199,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
bin->hits -= bin->hits >> DECAY_SHIFT;
bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
- cpu_data->total += bin->hits + bin->intercepts;
-
target_residency_ns = drv->states[i].target_residency_ns;
if (target_residency_ns <= cpu_data->sleep_length_ns) {
@@ -206,38 +208,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
}
}
- /*
- * If the deepest state's target residency is below the tick length,
- * make a record of it to help teo_select() decide whether or not
- * to stop the tick. This effectively adds an extra hits-only bin
- * beyond the last state-related one.
- */
- if (target_residency_ns < TICK_NSEC) {
- cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
-
- cpu_data->total += cpu_data->tick_hits;
-
- if (TICK_NSEC <= cpu_data->sleep_length_ns) {
- idx_timer = drv->state_count;
- if (TICK_NSEC <= measured_ns) {
- cpu_data->tick_hits += PULSE;
- goto end;
- }
- }
- }
-
+ cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
/*
* If the measured idle duration falls into the same bin as the sleep
* length, this is a "hit", so update the "hits" metric for that bin.
* Otherwise, update the "intercepts" metric for the bin fallen into by
* the measured idle duration.
*/
- if (idx_timer == idx_duration)
+ if (idx_timer == idx_duration) {
cpu_data->state_bins[idx_timer].hits += PULSE;
- else
+ } else {
cpu_data->state_bins[idx_duration].intercepts += PULSE;
+ if (TICK_NSEC <= measured_ns)
+ cpu_data->tick_intercepts += PULSE;
+ }
-end:
+ cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
cpu_data->total += PULSE;
}
@@ -285,14 +271,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
ktime_t delta_tick = TICK_NSEC / 2;
- unsigned int tick_intercept_sum = 0;
unsigned int idx_intercept_sum = 0;
unsigned int intercept_sum = 0;
unsigned int idx_hit_sum = 0;
unsigned int hit_sum = 0;
int constraint_idx = 0;
int idx0 = 0, idx = -1;
- int prev_intercept_idx;
s64 duration_ns;
int i;
@@ -301,10 +285,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
dev->last_state_idx = -1;
}
- cpu_data->time_span_ns = local_clock();
/*
- * Set the expected sleep length to infinity in case of an early
- * return.
+ * Set the sleep length to infinity in case the invocation of
+ * tick_nohz_get_sleep_length() below is skipped, in which case it won't
+ * be known whether or not the subsequent wakeup is caused by a timer.
+ * It is generally fine to count the wakeup as an intercept then, except
+ * for the cases when the CPU is mostly woken up by timers and there may
+ * be opportunities to ask for a deeper idle state when no imminent
+ * timers are scheduled which may be missed.
*/
cpu_data->sleep_length_ns = KTIME_MAX;
@@ -360,17 +348,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
goto end;
}
- tick_intercept_sum = intercept_sum +
- cpu_data->state_bins[drv->state_count-1].intercepts;
-
/*
* If the sum of the intercepts metric for all of the idle states
* shallower than the current candidate one (idx) is greater than the
* sum of the intercepts and hits metrics for the candidate state and
- * all of the deeper states a shallower idle state is likely to be a
+ * all of the deeper states, a shallower idle state is likely to be a
* better choice.
*/
- prev_intercept_idx = idx;
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
int first_suitable_idx = idx;
@@ -396,41 +380,38 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* first enabled state that is deep enough.
*/
if (teo_state_ok(i, drv) &&
- !dev->states_usage[i].disable)
+ !dev->states_usage[i].disable) {
idx = i;
- else
- idx = first_suitable_idx;
-
+ break;
+ }
+ idx = first_suitable_idx;
break;
}
if (dev->states_usage[i].disable)
continue;
- if (!teo_state_ok(i, drv)) {
+ if (teo_state_ok(i, drv)) {
/*
- * The current state is too shallow, but if an
- * alternative candidate state has been found,
- * it may still turn out to be a better choice.
+ * The current state is deep enough, but still
+ * there may be a better one.
*/
- if (first_suitable_idx != idx)
- continue;
-
- break;
+ first_suitable_idx = i;
+ continue;
}
- first_suitable_idx = i;
+ /*
+ * The current state is too shallow, so if no suitable
+ * states other than the initial candidate have been
+ * found, give up (the remaining states to check are
+ * shallower still), but otherwise the first suitable
+ * state other than the initial candidate may turn out
+ * to be preferable.
+ */
+ if (first_suitable_idx == idx)
+ break;
}
}
- if (!idx && prev_intercept_idx) {
- /*
- * We have to query the sleep length here otherwise we don't
- * know after wakeup if our guess was correct.
- */
- duration_ns = tick_nohz_get_sleep_length(&delta_tick);
- cpu_data->sleep_length_ns = duration_ns;
- goto out_tick;
- }
/*
* If there is a latency constraint, it may be necessary to select an
@@ -440,24 +421,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = constraint_idx;
/*
- * Skip the timers check if state 0 is the current candidate one,
- * because an immediate non-timer wakeup is expected in that case.
+ * If either the candidate state is state 0 or its target residency is
+ * low enough, there is basically nothing more to do, but if the sleep
+ * length is not updated, the subsequent wakeup will be counted as an
+ * "intercept" which may be problematic in the cases when timer wakeups
+ * are dominant. Namely, it may effectively prevent deeper idle states
+ * from being selected at one point even if no imminent timers are
+ * scheduled.
+ *
+ * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
+ * CPU are unlikely (user space has a default 50 us slack value for
+ * hrtimers and there are relatively few timers with a lower deadline
+ * value in the kernel), and even if they did happen, the potential
+ * benefit from using a deep idle state in that case would be
+ * questionable anyway for latency reasons. Thus if the measured idle
+ * duration falls into that range in the majority of cases, assume
+ * non-timer wakeups to be dominant and skip updating the sleep length
+ * to reduce latency.
+ *
+ * Also, if the latency constraint is sufficiently low, it will force
+ * shallow idle states regardless of the wakeup type, so the sleep
+ * length need not be known in that case.
*/
- if (!idx)
- goto out_tick;
-
- /*
- * If state 0 is a polling one, check if the target residency of
- * the current candidate state is low enough and skip the timers
- * check in that case too.
- */
- if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
- drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+ if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
+ (2 * cpu_data->short_idles >= cpu_data->total ||
+ latency_req < LATENCY_THRESHOLD_NS))
goto out_tick;
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
+ if (!idx)
+ goto out_tick;
+
/*
* If the closest expected timer is before the target residency of the
* candidate state, a shallower one needs to be found.
@@ -474,7 +470,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* total wakeup events, do not stop the tick.
*/
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
- tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+ cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8)
duration_ns = TICK_NSEC / 2;
end:
@@ -511,17 +507,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
dev->last_state_idx = state;
- /*
- * If the wakeup was not "natural", but triggered by one of the safety
- * nets, assume that the CPU might have been idle for the entire sleep
- * length time.
- */
if (dev->poll_time_limit ||
(tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
+ /*
+ * The wakeup was not "genuine", but triggered by one of the
+ * safety nets.
+ */
dev->poll_time_limit = false;
- cpu_data->time_span_ns = cpu_data->sleep_length_ns;
+ cpu_data->artificial_wakeup = true;
} else {
- cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
+ cpu_data->artificial_wakeup = false;
}
}
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 4cb455b2bdee..619b6fb9d833 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -490,8 +490,7 @@ static int mxc_gpio_probe(struct platform_device *pdev)
port->gc.request = mxc_gpio_request;
port->gc.free = mxc_gpio_free;
port->gc.to_irq = mxc_gpio_to_irq;
- port->gc.base = (pdev->id < 0) ? of_alias_get_id(np, "gpio") * 32 :
- pdev->id * 32;
+ port->gc.base = of_alias_get_id(np, "gpio") * 32;
err = devm_gpiochip_add_data(&pdev->dev, &port->gc, port);
if (err)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7b78c2bada81..e45bba240cbc 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1538,17 +1538,20 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
NETIF_F_HIGHDMA | NETIF_F_LRO)
#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE)
+ NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_GSO_PARTIAL)
#define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
NETIF_F_GSO_SOFTWARE)
+#define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP)
+
static void bond_compute_features(struct bonding *bond)
{
+ netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES;
unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
IFF_XMIT_DST_RELEASE_PERM;
- netdev_features_t gso_partial_features = NETIF_F_GSO_ESP;
netdev_features_t vlan_features = BOND_VLAN_FEATURES;
netdev_features_t enc_features = BOND_ENC_FEATURES;
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1582,8 +1585,9 @@ static void bond_compute_features(struct bonding *bond)
BOND_XFRM_FEATURES);
#endif /* CONFIG_XFRM_OFFLOAD */
- if (slave->dev->hw_enc_features & NETIF_F_GSO_PARTIAL)
- gso_partial_features &= slave->dev->gso_partial_features;
+ gso_partial_features = netdev_increment_features(gso_partial_features,
+ slave->dev->gso_partial_features,
+ BOND_GSO_PARTIAL_FEATURES);
mpls_features = netdev_increment_features(mpls_features,
slave->dev->mpls_features,
@@ -1598,12 +1602,8 @@ static void bond_compute_features(struct bonding *bond)
}
bond_dev->hard_header_len = max_hard_header_len;
- if (gso_partial_features & NETIF_F_GSO_ESP)
- bond_dev->gso_partial_features |= NETIF_F_GSO_ESP;
- else
- bond_dev->gso_partial_features &= ~NETIF_F_GSO_ESP;
-
done:
+ bond_dev->gso_partial_features = gso_partial_features;
bond_dev->vlan_features = vlan_features;
bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HW_VLAN_CTAG_TX |
@@ -6046,6 +6046,7 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ bond_dev->features |= NETIF_F_GSO_PARTIAL;
#ifdef CONFIG_XFRM_OFFLOAD
bond_dev->hw_features |= BOND_XFRM_FEATURES;
/* Only enable XFRM features if this is an active-backup config */
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index d73ef262991d..6fee9a41839c 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -328,8 +328,7 @@
#define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */
#define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \
BGMAC_RX_FRAME_OFFSET)
-/* Jumbo frame size with FCS */
-#define BGMAC_RX_MAX_FRAME_SIZE 9724
+#define BGMAC_RX_MAX_FRAME_SIZE 1536
#define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 9cc8db10a8d6..1c94bf1db718 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7424,7 +7424,7 @@ static void tg3_napi_enable(struct tg3 *tp)
for (i = 0; i < tp->irq_cnt; i++) {
tnapi = &tp->napi[i];
- napi_enable(&tnapi->napi);
+ napi_enable_locked(&tnapi->napi);
if (tnapi->tx_buffers) {
netif_queue_set_napi(tp->dev, txq_idx,
NETDEV_QUEUE_TYPE_TX,
@@ -7445,9 +7445,10 @@ static void tg3_napi_init(struct tg3 *tp)
int i;
for (i = 0; i < tp->irq_cnt; i++) {
- netif_napi_add(tp->dev, &tp->napi[i].napi,
- i ? tg3_poll_msix : tg3_poll);
- netif_napi_set_irq(&tp->napi[i].napi, tp->napi[i].irq_vec);
+ netif_napi_add_locked(tp->dev, &tp->napi[i].napi,
+ i ? tg3_poll_msix : tg3_poll);
+ netif_napi_set_irq_locked(&tp->napi[i].napi,
+ tp->napi[i].irq_vec);
}
}
@@ -11259,6 +11260,8 @@ static void tg3_timer_stop(struct tg3 *tp)
static int tg3_restart_hw(struct tg3 *tp, bool reset_phy)
__releases(tp->lock)
__acquires(tp->lock)
+ __releases(tp->dev->lock)
+ __acquires(tp->dev->lock)
{
int err;
@@ -11271,7 +11274,9 @@ static int tg3_restart_hw(struct tg3 *tp, bool reset_phy)
tg3_timer_stop(tp);
tp->irq_sync = 0;
tg3_napi_enable(tp);
+ netdev_unlock(tp->dev);
dev_close(tp->dev);
+ netdev_lock(tp->dev);
tg3_full_lock(tp, 0);
}
return err;
@@ -11299,6 +11304,7 @@ static void tg3_reset_task(struct work_struct *work)
tg3_netif_stop(tp);
+ netdev_lock(tp->dev);
tg3_full_lock(tp, 1);
if (tg3_flag(tp, TX_RECOVERY_PENDING)) {
@@ -11318,12 +11324,14 @@ static void tg3_reset_task(struct work_struct *work)
* call cancel_work_sync() and wait forever.
*/
tg3_flag_clear(tp, RESET_TASK_PENDING);
+ netdev_unlock(tp->dev);
dev_close(tp->dev);
goto out;
}
tg3_netif_start(tp);
tg3_full_unlock(tp);
+ netdev_unlock(tp->dev);
tg3_phy_start(tp);
tg3_flag_clear(tp, RESET_TASK_PENDING);
out:
@@ -11683,9 +11691,11 @@ static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq,
if (err)
goto out_ints_fini;
+ netdev_lock(dev);
tg3_napi_init(tp);
tg3_napi_enable(tp);
+ netdev_unlock(dev);
for (i = 0; i < tp->irq_cnt; i++) {
err = tg3_request_irq(tp, i);
@@ -12569,6 +12579,7 @@ static int tg3_set_ringparam(struct net_device *dev,
irq_sync = 1;
}
+ netdev_lock(dev);
tg3_full_lock(tp, irq_sync);
tp->rx_pending = ering->rx_pending;
@@ -12597,6 +12608,7 @@ static int tg3_set_ringparam(struct net_device *dev,
}
tg3_full_unlock(tp);
+ netdev_unlock(dev);
if (irq_sync && !err)
tg3_phy_start(tp);
@@ -12678,6 +12690,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
irq_sync = 1;
}
+ netdev_lock(dev);
tg3_full_lock(tp, irq_sync);
if (epause->autoneg)
@@ -12707,6 +12720,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
}
tg3_full_unlock(tp);
+ netdev_unlock(dev);
}
tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED;
@@ -13911,6 +13925,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
data[TG3_INTERRUPT_TEST] = 1;
}
+ netdev_lock(dev);
tg3_full_lock(tp, 0);
tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
@@ -13922,6 +13937,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
}
tg3_full_unlock(tp);
+ netdev_unlock(dev);
if (irq_sync && !err2)
tg3_phy_start(tp);
@@ -14365,6 +14381,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
tg3_set_mtu(dev, tp, new_mtu);
+ netdev_lock(dev);
tg3_full_lock(tp, 1);
tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
@@ -14384,6 +14401,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
tg3_netif_start(tp);
tg3_full_unlock(tp);
+ netdev_unlock(dev);
if (!err)
tg3_phy_start(tp);
@@ -18164,6 +18182,7 @@ static int tg3_resume(struct device *device)
netif_device_attach(dev);
+ netdev_lock(dev);
tg3_full_lock(tp, 0);
tg3_ape_driver_state_change(tp, RESET_KIND_INIT);
@@ -18180,6 +18199,7 @@ static int tg3_resume(struct device *device)
out:
tg3_full_unlock(tp);
+ netdev_unlock(dev);
if (!err)
tg3_phy_start(tp);
@@ -18260,7 +18280,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
done:
if (state == pci_channel_io_perm_failure) {
if (netdev) {
+ netdev_lock(netdev);
tg3_napi_enable(tp);
+ netdev_unlock(netdev);
dev_close(netdev);
}
err = PCI_ERS_RESULT_DISCONNECT;
@@ -18314,7 +18336,9 @@ static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev)
done:
if (rc != PCI_ERS_RESULT_RECOVERED && netdev && netif_running(netdev)) {
+ netdev_lock(netdev);
tg3_napi_enable(tp);
+ netdev_unlock(netdev);
dev_close(netdev);
}
rtnl_unlock();
@@ -18340,12 +18364,14 @@ static void tg3_io_resume(struct pci_dev *pdev)
if (!netdev || !netif_running(netdev))
goto done;
+ netdev_lock(netdev);
tg3_full_lock(tp, 0);
tg3_ape_driver_state_change(tp, RESET_KIND_INIT);
tg3_flag_set(tp, INIT_COMPLETE);
err = tg3_restart_hw(tp, true);
if (err) {
tg3_full_unlock(tp);
+ netdev_unlock(netdev);
netdev_err(netdev, "Cannot restart hardware after reset.\n");
goto done;
}
@@ -18357,6 +18383,7 @@ static void tg3_io_resume(struct pci_dev *pdev)
tg3_netif_start(tp);
tg3_full_unlock(tp);
+ netdev_unlock(netdev);
tg3_phy_start(tp);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 8735e333034c..b87eaf0c250c 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1777,10 +1777,11 @@ static void dm9000_drv_remove(struct platform_device *pdev)
unregister_netdev(ndev);
dm9000_release_board(pdev, dm);
- free_netdev(ndev); /* free device structure */
if (dm->power_supply)
regulator_disable(dm->power_supply);
+ free_netdev(ndev); /* free device structure */
+
dev_dbg(&pdev->dev, "released and freed device\n");
}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 68725506a095..f7c4ce8e9a26 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -840,6 +840,8 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
struct fec_enet_private *fep = netdev_priv(ndev);
int hdr_len, total_len, data_left;
struct bufdesc *bdp = txq->bd.cur;
+ struct bufdesc *tmp_bdp;
+ struct bufdesc_ex *ebdp;
struct tso_t tso;
unsigned int index = 0;
int ret;
@@ -913,7 +915,34 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
return 0;
err_release:
- /* TODO: Release all used data descriptors for TSO */
+ /* Release all used data descriptors for TSO */
+ tmp_bdp = txq->bd.cur;
+
+ while (tmp_bdp != bdp) {
+ /* Unmap data buffers */
+ if (tmp_bdp->cbd_bufaddr &&
+ !IS_TSO_HEADER(txq, fec32_to_cpu(tmp_bdp->cbd_bufaddr)))
+ dma_unmap_single(&fep->pdev->dev,
+ fec32_to_cpu(tmp_bdp->cbd_bufaddr),
+ fec16_to_cpu(tmp_bdp->cbd_datlen),
+ DMA_TO_DEVICE);
+
+ /* Clear standard buffer descriptor fields */
+ tmp_bdp->cbd_sc = 0;
+ tmp_bdp->cbd_datlen = 0;
+ tmp_bdp->cbd_bufaddr = 0;
+
+ /* Handle extended descriptor if enabled */
+ if (fep->bufdesc_ex) {
+ ebdp = (struct bufdesc_ex *)tmp_bdp;
+ ebdp->cbd_esc = 0;
+ }
+
+ tmp_bdp = fec_enet_get_nextdesc(tmp_bdp, &txq->bd);
+ }
+
+ dev_kfree_skb_any(skb);
+
return ret;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 9a63fbc69408..b25fb400f476 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -40,6 +40,21 @@ EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare);
*/
static DEFINE_MUTEX(hnae3_common_lock);
+/* ensure the drivers being unloaded one by one */
+static DEFINE_MUTEX(hnae3_unload_lock);
+
+void hnae3_acquire_unload_lock(void)
+{
+ mutex_lock(&hnae3_unload_lock);
+}
+EXPORT_SYMBOL(hnae3_acquire_unload_lock);
+
+void hnae3_release_unload_lock(void)
+{
+ mutex_unlock(&hnae3_unload_lock);
+}
+EXPORT_SYMBOL(hnae3_release_unload_lock);
+
static bool hnae3_client_match(enum hnae3_client_type client_type)
{
if (client_type == HNAE3_CLIENT_KNIC ||
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 12ba380eb701..4e44f28288f9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -963,4 +963,6 @@ int hnae3_register_client(struct hnae3_client *client);
void hnae3_set_client_init_flag(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev,
unsigned int inited);
+void hnae3_acquire_unload_lock(void);
+void hnae3_release_unload_lock(void);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index a7e3b22f641c..9ff797fb36c4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -6002,9 +6002,11 @@ module_init(hns3_init_module);
*/
static void __exit hns3_exit_module(void)
{
+ hnae3_acquire_unload_lock();
pci_unregister_driver(&hns3_driver);
hnae3_unregister_client(&client);
hns3_dbg_unregister_debugfs();
+ hnae3_release_unload_lock();
}
module_exit(hns3_exit_module);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index db7845009252..3f17b3073e50 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -12919,9 +12919,11 @@ static int __init hclge_init(void)
static void __exit hclge_exit(void)
{
+ hnae3_acquire_unload_lock();
hnae3_unregister_ae_algo_prepare(&ae_algo);
hnae3_unregister_ae_algo(&ae_algo);
destroy_workqueue(hclge_wq);
+ hnae3_release_unload_lock();
}
module_init(hclge_init);
module_exit(hclge_exit);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 163c6e59ea4c..9ba767740a04 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -3410,8 +3410,10 @@ static int __init hclgevf_init(void)
static void __exit hclgevf_exit(void)
{
+ hnae3_acquire_unload_lock();
hnae3_unregister_ae_algo(&ae_algovf);
destroy_workqueue(hclgevf_wq);
+ hnae3_release_unload_lock();
}
module_init(hclgevf_init);
module_exit(hclgevf_exit);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index cbfaaa5b7d02..2d7a18fcc3be 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -773,6 +773,11 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter,
f->state = IAVF_VLAN_ADD;
adapter->num_vlan_filters++;
iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER);
+ } else if (f->state == IAVF_VLAN_REMOVE) {
+ /* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed.
+ * We can safely only change the state here.
+ */
+ f->state = IAVF_VLAN_ACTIVE;
}
clearout:
@@ -793,8 +798,18 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
f = iavf_find_vlan(adapter, vlan);
if (f) {
- f->state = IAVF_VLAN_REMOVE;
- iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER);
+ /* IAVF_ADD_VLAN means that VLAN wasn't even added yet.
+ * Remove it from the list.
+ */
+ if (f->state == IAVF_VLAN_ADD) {
+ list_del(&f->list);
+ kfree(f);
+ adapter->num_vlan_filters--;
+ } else {
+ f->state = IAVF_VLAN_REMOVE;
+ iavf_schedule_aq_request(adapter,
+ IAVF_FLAG_AQ_DEL_VLAN_FILTER);
+ }
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 01536a382e54..bdee499f991a 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1498,7 +1498,6 @@ struct ice_aqc_dnl_equa_param {
#define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT)
#define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT)
#define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT)
-#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT)
#define ICE_AQC_RX_EQU_CTLE_GAINHF (0x20 << ICE_AQC_RX_EQU_SHIFT)
#define ICE_AQC_RX_EQU_CTLE_GAINLF (0x21 << ICE_AQC_RX_EQU_SHIFT)
#define ICE_AQC_RX_EQU_CTLE_GAINDC (0x22 << ICE_AQC_RX_EQU_SHIFT)
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 3072634bf049..f241493a6ac8 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -710,7 +710,6 @@ static int ice_get_tx_rx_equa(struct ice_hw *hw, u8 serdes_num,
{ ICE_AQC_RX_EQU_POST1, rx, &ptr->rx_equ_post1 },
{ ICE_AQC_RX_EQU_BFLF, rx, &ptr->rx_equ_bflf },
{ ICE_AQC_RX_EQU_BFHF, rx, &ptr->rx_equ_bfhf },
- { ICE_AQC_RX_EQU_DRATE, rx, &ptr->rx_equ_drate },
{ ICE_AQC_RX_EQU_CTLE_GAINHF, rx, &ptr->rx_equ_ctle_gainhf },
{ ICE_AQC_RX_EQU_CTLE_GAINLF, rx, &ptr->rx_equ_ctle_gainlf },
{ ICE_AQC_RX_EQU_CTLE_GAINDC, rx, &ptr->rx_equ_ctle_gaindc },
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h
index 8f2ad1c172c0..23b2cfbc9684 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.h
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h
@@ -15,7 +15,6 @@ struct ice_serdes_equalization_to_ethtool {
int rx_equ_post1;
int rx_equ_bflf;
int rx_equ_bfhf;
- int rx_equ_drate;
int rx_equ_ctle_gainhf;
int rx_equ_ctle_gainlf;
int rx_equ_ctle_gaindc;
diff --git a/drivers/net/ethernet/intel/ice/ice_parser.h b/drivers/net/ethernet/intel/ice/ice_parser.h
index 6509d807627c..4f56d53d56b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_parser.h
+++ b/drivers/net/ethernet/intel/ice/ice_parser.h
@@ -257,7 +257,6 @@ ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size,
/*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/
#define ICE_BST_TCAM_TABLE_SIZE 256
#define ICE_BST_TCAM_KEY_SIZE 20
-#define ICE_BST_KEY_TCAM_SIZE 19
/* Boost TCAM item */
struct ice_bst_tcam_item {
@@ -401,7 +400,6 @@ u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag);
#define ICE_PARSER_GPR_NUM 128
#define ICE_PARSER_FLG_NUM 64
#define ICE_PARSER_ERR_NUM 16
-#define ICE_BST_KEY_SIZE 10
#define ICE_MARKER_ID_SIZE 9
#define ICE_MARKER_MAX_SIZE \
(ICE_MARKER_ID_SIZE * BITS_PER_BYTE - 1)
@@ -431,13 +429,13 @@ struct ice_parser_rt {
u8 pkt_buf[ICE_PARSER_MAX_PKT_LEN + ICE_PARSER_PKT_REV];
u16 pkt_len;
u16 po;
- u8 bst_key[ICE_BST_KEY_SIZE];
+ u8 bst_key[ICE_BST_TCAM_KEY_SIZE];
struct ice_pg_cam_key pg_key;
+ u8 pg_prio;
struct ice_alu *alu0;
struct ice_alu *alu1;
struct ice_alu *alu2;
struct ice_pg_cam_action *action;
- u8 pg_prio;
struct ice_gpr_pu pu;
u8 markers[ICE_MARKER_ID_SIZE];
bool protocols[ICE_PO_PAIR_SIZE];
diff --git a/drivers/net/ethernet/intel/ice/ice_parser_rt.c b/drivers/net/ethernet/intel/ice/ice_parser_rt.c
index dedf5e854e4b..3995d662e050 100644
--- a/drivers/net/ethernet/intel/ice/ice_parser_rt.c
+++ b/drivers/net/ethernet/intel/ice/ice_parser_rt.c
@@ -125,22 +125,20 @@ static void ice_bst_key_init(struct ice_parser_rt *rt,
else
key[idd] = imem->b_kb.prio;
- idd = ICE_BST_KEY_TCAM_SIZE - 1;
+ idd = ICE_BST_TCAM_KEY_SIZE - 2;
for (i = idd; i >= 0; i--) {
int j;
j = ho + idd - i;
if (j < ICE_PARSER_MAX_PKT_LEN)
- key[i] = rt->pkt_buf[ho + idd - i];
+ key[i] = rt->pkt_buf[j];
else
key[i] = 0;
}
- ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generated Boost TCAM Key:\n");
- ice_debug(rt->psr->hw, ICE_DBG_PARSER, "%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
- key[0], key[1], key[2], key[3], key[4],
- key[5], key[6], key[7], key[8], key[9]);
- ice_debug(rt->psr->hw, ICE_DBG_PARSER, "\n");
+ ice_debug_array_w_prefix(rt->psr->hw, ICE_DBG_PARSER,
+ KBUILD_MODNAME ": Generated Boost TCAM Key",
+ key, ICE_BST_TCAM_KEY_SIZE);
}
static u16 ice_bit_rev_u16(u16 v, int len)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
index 4849590a5591..b28991dd1870 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -376,6 +376,9 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD))
break;
+ /* Ensure no other fields are read until DD flag is checked */
+ dma_rmb();
+
/* strip off FW internal code */
desc_err = le16_to_cpu(desc->ret_val) & 0xff;
@@ -563,6 +566,9 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
if (!(flags & IDPF_CTLQ_FLAG_DD))
break;
+ /* Ensure no other fields are read until DD flag is checked */
+ dma_rmb();
+
q_msg[i].vmvf_type = (flags &
(IDPF_CTLQ_FLAG_FTYPE_VM |
IDPF_CTLQ_FLAG_FTYPE_PF)) >>
diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c
index f71d3182580b..b6c515d14cbf 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_main.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -174,7 +174,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
pci_set_drvdata(pdev, adapter);
- adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0,
+ adapter->init_wq = alloc_workqueue("%s-%s-init",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->init_wq) {
@@ -183,7 +184,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_free;
}
- adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0,
+ adapter->serv_wq = alloc_workqueue("%s-%s-service",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->serv_wq) {
@@ -192,7 +194,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_serv_wq_alloc;
}
- adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0,
+ adapter->mbx_wq = alloc_workqueue("%s-%s-mbx",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->mbx_wq) {
@@ -201,7 +204,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_mbx_wq_alloc;
}
- adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0,
+ adapter->stats_wq = alloc_workqueue("%s-%s-stats",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->stats_wq) {
@@ -210,7 +214,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_stats_wq_alloc;
}
- adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0,
+ adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
dev_driver_string(dev),
dev_name(dev));
if (!adapter->vc_event_wq) {
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index d46c95f91b0d..3d2413b8684f 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -517,8 +517,10 @@ static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter,
retval = -ENXIO;
goto only_unlock;
case IDPF_VC_XN_WAITING:
- dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n",
- params->vc_op, params->timeout_ms);
+ dev_notice_ratelimited(&adapter->pdev->dev,
+ "Transaction timed-out (op:%d cookie:%04x vc_op:%d salt:%02x timeout:%dms)\n",
+ params->vc_op, cookie, xn->vc_op,
+ xn->salt, params->timeout_ms);
retval = -ETIME;
break;
case IDPF_VC_XN_COMPLETED_SUCCESS:
@@ -612,14 +614,16 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter,
return -EINVAL;
}
xn = &adapter->vcxn_mngr->ring[xn_idx];
+ idpf_vc_xn_lock(xn);
salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info);
if (xn->salt != salt) {
- dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n",
- xn->salt, salt);
+ dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (exp:%d@%02x(%d) != got:%d@%02x)\n",
+ xn->vc_op, xn->salt, xn->state,
+ ctlq_msg->cookie.mbx.chnl_opcode, salt);
+ idpf_vc_xn_unlock(xn);
return -EINVAL;
}
- idpf_vc_xn_lock(xn);
switch (xn->state) {
case IDPF_VC_XN_WAITING:
/* success */
@@ -3077,12 +3081,21 @@ init_failed:
*/
void idpf_vc_core_deinit(struct idpf_adapter *adapter)
{
+ bool remove_in_prog;
+
if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags))
return;
+ /* Avoid transaction timeouts when called during reset */
+ remove_in_prog = test_bit(IDPF_REMOVE_IN_PROG, adapter->flags);
+ if (!remove_in_prog)
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
+
idpf_deinit_task(adapter);
idpf_intr_rel(adapter);
- idpf_vc_xn_shutdown(adapter->vcxn_mngr);
+
+ if (remove_in_prog)
+ idpf_vc_xn_shutdown(adapter->vcxn_mngr);
cancel_delayed_work_sync(&adapter->serv_task);
cancel_delayed_work_sync(&adapter->mbx_task);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 82f4333fb426..4fe121b9f94b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4432,6 +4432,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
*/
if (pp->is_stopped) {
spin_unlock(&pp->lock);
+ netdev_unlock(port->napi.dev);
return 0;
}
netif_tx_stop_all_queues(pp->dev);
diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c
index 415d784de741..09f448f29124 100644
--- a/drivers/net/ethernet/mediatek/airoha_eth.c
+++ b/drivers/net/ethernet/mediatek/airoha_eth.c
@@ -266,11 +266,11 @@
#define REG_GDM3_FWD_CFG GDM3_BASE
#define GDM3_PAD_EN_MASK BIT(28)
-#define REG_GDM4_FWD_CFG (GDM4_BASE + 0x100)
+#define REG_GDM4_FWD_CFG GDM4_BASE
#define GDM4_PAD_EN_MASK BIT(28)
#define GDM4_SPORT_OFFSET0_MASK GENMASK(11, 8)
-#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x33c)
+#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x23c)
#define GDM4_SPORT_OFF2_MASK GENMASK(19, 16)
#define GDM4_SPORT_OFF1_MASK GENMASK(15, 12)
#define GDM4_SPORT_OFF0_MASK GENMASK(11, 8)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bd41b75d246e..a814b63ed97e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2087,7 +2087,7 @@ static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c,
struct mlx5e_xdpsq *xdpsq;
int err;
- xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, c->cpu);
+ xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu));
if (!xdpsq)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 720f577929db..499e5e39d513 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1120,20 +1120,6 @@ static void nv_disable_hw_interrupts(struct net_device *dev, u32 mask)
}
}
-static void nv_napi_enable(struct net_device *dev)
-{
- struct fe_priv *np = get_nvpriv(dev);
-
- napi_enable(&np->napi);
-}
-
-static void nv_napi_disable(struct net_device *dev)
-{
- struct fe_priv *np = get_nvpriv(dev);
-
- napi_disable(&np->napi);
-}
-
#define MII_READ (-1)
/* mii_rw: read/write a register on the PHY.
*
@@ -3114,7 +3100,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
* Changing the MTU is a rare event, it shouldn't matter.
*/
nv_disable_irq(dev);
- nv_napi_disable(dev);
+ napi_disable(&np->napi);
netif_tx_lock_bh(dev);
netif_addr_lock(dev);
spin_lock(&np->lock);
@@ -3143,7 +3129,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
spin_unlock(&np->lock);
netif_addr_unlock(dev);
netif_tx_unlock_bh(dev);
- nv_napi_enable(dev);
+ napi_enable(&np->napi);
nv_enable_irq(dev);
}
return 0;
@@ -4731,7 +4717,7 @@ static int nv_set_ringparam(struct net_device *dev,
if (netif_running(dev)) {
nv_disable_irq(dev);
- nv_napi_disable(dev);
+ napi_disable(&np->napi);
netif_tx_lock_bh(dev);
netif_addr_lock(dev);
spin_lock(&np->lock);
@@ -4784,7 +4770,7 @@ static int nv_set_ringparam(struct net_device *dev,
spin_unlock(&np->lock);
netif_addr_unlock(dev);
netif_tx_unlock_bh(dev);
- nv_napi_enable(dev);
+ napi_enable(&np->napi);
nv_enable_irq(dev);
}
return 0;
@@ -5277,7 +5263,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
if (test->flags & ETH_TEST_FL_OFFLINE) {
if (netif_running(dev)) {
netif_stop_queue(dev);
- nv_napi_disable(dev);
+ napi_disable(&np->napi);
netif_tx_lock_bh(dev);
netif_addr_lock(dev);
spin_lock_irq(&np->lock);
@@ -5334,7 +5320,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
/* restart rx engine */
nv_start_rxtx(dev);
netif_start_queue(dev);
- nv_napi_enable(dev);
+ napi_enable(&np->napi);
nv_enable_hw_interrupts(dev, np->irqmask);
}
}
@@ -5576,6 +5562,7 @@ static int nv_open(struct net_device *dev)
/* ask for interrupts */
nv_enable_hw_interrupts(dev, np->irqmask);
+ netdev_lock(dev);
spin_lock_irq(&np->lock);
writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA);
writel(0, base + NvRegMulticastAddrB);
@@ -5594,7 +5581,7 @@ static int nv_open(struct net_device *dev)
ret = nv_update_linkspeed(dev);
nv_start_rxtx(dev);
netif_start_queue(dev);
- nv_napi_enable(dev);
+ napi_enable_locked(&np->napi);
if (ret) {
netif_carrier_on(dev);
@@ -5611,6 +5598,7 @@ static int nv_open(struct net_device *dev)
round_jiffies(jiffies + STATS_INTERVAL));
spin_unlock_irq(&np->lock);
+ netdev_unlock(dev);
/* If the loopback feature was set while the device was down, make sure
* that it's set correctly now.
@@ -5632,7 +5620,7 @@ static int nv_close(struct net_device *dev)
spin_lock_irq(&np->lock);
np->in_shutdown = 1;
spin_unlock_irq(&np->lock);
- nv_napi_disable(dev);
+ napi_disable(&np->napi);
synchronize_irq(np->pci_dev->irq);
del_timer_sync(&np->oom_kick);
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 9ce0e8a64ba8..a73dcaffa8c5 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -1684,6 +1684,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work)
if (tmp8 & CmdTxEnb)
RTL_W8 (ChipCmd, CmdRxEnb);
+ netdev_lock(dev);
spin_lock_bh(&tp->rx_lock);
/* Disable interrupts by clearing the interrupt mask. */
RTL_W16 (IntrMask, 0x0000);
@@ -1694,11 +1695,12 @@ static void rtl8139_tx_timeout_task (struct work_struct *work)
spin_unlock_irq(&tp->lock);
/* ...and finally, reset everything */
- napi_enable(&tp->napi);
+ napi_enable_locked(&tp->napi);
rtl8139_hw_start(dev);
netif_wake_queue(dev);
spin_unlock_bh(&tp->rx_lock);
+ netdev_unlock(dev);
}
static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index bc395294a32d..c9f4976a3527 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -3217,10 +3217,15 @@ static int ravb_suspend(struct device *dev)
netif_device_detach(ndev);
- if (priv->wol_enabled)
- return ravb_wol_setup(ndev);
+ rtnl_lock();
+ if (priv->wol_enabled) {
+ ret = ravb_wol_setup(ndev);
+ rtnl_unlock();
+ return ret;
+ }
ret = ravb_close(ndev);
+ rtnl_unlock();
if (ret)
return ret;
@@ -3245,19 +3250,20 @@ static int ravb_resume(struct device *dev)
if (!netif_running(ndev))
return 0;
+ rtnl_lock();
/* If WoL is enabled restore the interface. */
- if (priv->wol_enabled) {
+ if (priv->wol_enabled)
ret = ravb_wol_restore(ndev);
- if (ret)
- return ret;
- } else {
+ else
ret = pm_runtime_force_resume(dev);
- if (ret)
- return ret;
+ if (ret) {
+ rtnl_unlock();
+ return ret;
}
/* Reopening the interface will restore the device to the working state. */
ret = ravb_open(ndev);
+ rtnl_unlock();
if (ret < 0)
goto out_rpm_put;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 8887b8921009..5fc8027c92c7 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3494,10 +3494,12 @@ static int sh_eth_suspend(struct device *dev)
netif_device_detach(ndev);
+ rtnl_lock();
if (mdp->wol_enabled)
ret = sh_eth_wol_setup(ndev);
else
ret = sh_eth_close(ndev);
+ rtnl_unlock();
return ret;
}
@@ -3511,10 +3513,12 @@ static int sh_eth_resume(struct device *dev)
if (!netif_running(ndev))
return 0;
+ rtnl_lock();
if (mdp->wol_enabled)
ret = sh_eth_wol_restore(ndev);
else
ret = sh_eth_open(ndev);
+ rtnl_unlock();
if (ret < 0)
return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5212dc439b1d..d04543e5697b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2424,11 +2424,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
u32 chan = 0;
u8 qmode = 0;
- if (rxfifosz == 0)
- rxfifosz = priv->dma_cap.rx_fifo_size;
- if (txfifosz == 0)
- txfifosz = priv->dma_cap.tx_fifo_size;
-
/* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */
if (priv->plat->has_gmac4 || priv->plat->has_xgmac) {
rxfifosz /= rx_channels_count;
@@ -2897,11 +2892,6 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
int rxfifosz = priv->plat->rx_fifo_size;
int txfifosz = priv->plat->tx_fifo_size;
- if (rxfifosz == 0)
- rxfifosz = priv->dma_cap.rx_fifo_size;
- if (txfifosz == 0)
- txfifosz = priv->dma_cap.tx_fifo_size;
-
/* Adjust for real per queue fifo size */
rxfifosz /= rx_channels_count;
txfifosz /= tx_channels_count;
@@ -5878,9 +5868,6 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
const int mtu = new_mtu;
int ret;
- if (txfifosz == 0)
- txfifosz = priv->dma_cap.tx_fifo_size;
-
txfifosz /= priv->plat->tx_queues_to_use;
if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) {
@@ -7217,6 +7204,50 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
if (priv->dma_cap.tsoen)
dev_info(priv->device, "TSO supported\n");
+ if (priv->dma_cap.number_rx_queues &&
+ priv->plat->rx_queues_to_use > priv->dma_cap.number_rx_queues) {
+ dev_warn(priv->device,
+ "Number of Rx queues (%u) exceeds dma capability\n",
+ priv->plat->rx_queues_to_use);
+ priv->plat->rx_queues_to_use = priv->dma_cap.number_rx_queues;
+ }
+ if (priv->dma_cap.number_tx_queues &&
+ priv->plat->tx_queues_to_use > priv->dma_cap.number_tx_queues) {
+ dev_warn(priv->device,
+ "Number of Tx queues (%u) exceeds dma capability\n",
+ priv->plat->tx_queues_to_use);
+ priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues;
+ }
+
+ if (!priv->plat->rx_fifo_size) {
+ if (priv->dma_cap.rx_fifo_size) {
+ priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size;
+ } else {
+ dev_err(priv->device, "Can't specify Rx FIFO size\n");
+ return -ENODEV;
+ }
+ } else if (priv->dma_cap.rx_fifo_size &&
+ priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) {
+ dev_warn(priv->device,
+ "Rx FIFO size (%u) exceeds dma capability\n",
+ priv->plat->rx_fifo_size);
+ priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size;
+ }
+ if (!priv->plat->tx_fifo_size) {
+ if (priv->dma_cap.tx_fifo_size) {
+ priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size;
+ } else {
+ dev_err(priv->device, "Can't specify Tx FIFO size\n");
+ return -ENODEV;
+ }
+ } else if (priv->dma_cap.tx_fifo_size &&
+ priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) {
+ dev_warn(priv->device,
+ "Tx FIFO size (%u) exceeds dma capability\n",
+ priv->plat->tx_fifo_size);
+ priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size;
+ }
+
priv->hw->vlan_fail_q_en =
(priv->plat->flags & STMMAC_FLAG_VLAN_FAIL_Q_EN);
priv->hw->vlan_fail_q = priv->plat->vlan_fail_q;
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index d7459866d24c..72177fea1cfb 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6086,7 +6086,7 @@ static void niu_enable_napi(struct niu *np)
int i;
for (i = 0; i < np->num_ldg; i++)
- napi_enable(&np->ldg[i].napi);
+ napi_enable_locked(&np->ldg[i].napi);
}
static void niu_disable_napi(struct niu *np)
@@ -6116,7 +6116,9 @@ static int niu_open(struct net_device *dev)
if (err)
goto out_free_channels;
+ netdev_lock(dev);
niu_enable_napi(np);
+ netdev_unlock(dev);
spin_lock_irq(&np->lock);
@@ -6521,6 +6523,7 @@ static void niu_reset_task(struct work_struct *work)
niu_reset_buffers(np);
+ netdev_lock(np->dev);
spin_lock_irqsave(&np->lock, flags);
err = niu_init_hw(np);
@@ -6531,6 +6534,7 @@ static void niu_reset_task(struct work_struct *work)
}
spin_unlock_irqrestore(&np->lock, flags);
+ netdev_unlock(np->dev);
}
static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue)
@@ -6761,7 +6765,9 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu)
niu_free_channels(np);
+ netdev_lock(dev);
niu_enable_napi(np);
+ netdev_unlock(dev);
err = niu_alloc_channels(np);
if (err)
@@ -9937,6 +9943,7 @@ static int __maybe_unused niu_resume(struct device *dev_d)
spin_lock_irqsave(&np->lock, flags);
+ netdev_lock(dev);
err = niu_init_hw(np);
if (!err) {
np->timer.expires = jiffies + HZ;
@@ -9945,6 +9952,7 @@ static int __maybe_unused niu_resume(struct device *dev_d)
}
spin_unlock_irqrestore(&np->lock, flags);
+ netdev_unlock(dev);
return err;
}
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 894911f3d560..e56ebbdd428d 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1568,7 +1568,7 @@ static void init_registers(struct net_device *dev)
if (rp->quirks & rqMgmt)
rhine_init_cam_filter(dev);
- napi_enable(&rp->napi);
+ napi_enable_locked(&rp->napi);
iowrite16(RHINE_EVENT & 0xffff, ioaddr + IntrEnable);
@@ -1696,7 +1696,10 @@ static int rhine_open(struct net_device *dev)
rhine_power_init(dev);
rhine_chip_reset(dev);
rhine_task_enable(rp);
+
+ netdev_lock(dev);
init_registers(dev);
+ netdev_unlock(dev);
netif_dbg(rp, ifup, dev, "%s() Done - status %04x MII status: %04x\n",
__func__, ioread16(ioaddr + ChipCmd),
@@ -1727,6 +1730,8 @@ static void rhine_reset_task(struct work_struct *work)
napi_disable(&rp->napi);
netif_tx_disable(dev);
+
+ netdev_lock(dev);
spin_lock_bh(&rp->lock);
/* clear all descriptors */
@@ -1740,6 +1745,7 @@ static void rhine_reset_task(struct work_struct *work)
init_registers(dev);
spin_unlock_bh(&rp->lock);
+ netdev_unlock(dev);
netif_trans_update(dev); /* prevent tx timeout */
dev->stats.tx_errors++;
@@ -2541,9 +2547,12 @@ static int rhine_resume(struct device *device)
alloc_tbufs(dev);
rhine_reset_rbufs(rp);
rhine_task_enable(rp);
+
+ netdev_lock(dev);
spin_lock_bh(&rp->lock);
init_registers(dev);
spin_unlock_bh(&rp->lock);
+ netdev_unlock(dev);
netif_device_attach(dev);
diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
index 3b23f3d3ca2b..5c80fbee7913 100644
--- a/drivers/net/netdevsim/ethtool.c
+++ b/drivers/net/netdevsim/ethtool.c
@@ -74,7 +74,7 @@ static void nsim_get_ringparam(struct net_device *dev,
memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring));
kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX;
- if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
+ if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index dcf073bc4802..96d54c08043d 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -134,6 +134,7 @@ struct netdevsim {
u32 sleep;
u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS];
+ struct dentry *ddir;
struct debugfs_u32_array dfs_ports[2];
} udp_ports;
diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c
index 02dc3123eb6c..640b4983a9a0 100644
--- a/drivers/net/netdevsim/udp_tunnels.c
+++ b/drivers/net/netdevsim/udp_tunnels.c
@@ -112,9 +112,11 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data,
struct net_device *dev = file->private_data;
struct netdevsim *ns = netdev_priv(dev);
- memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
rtnl_lock();
- udp_tunnel_nic_reset_ntf(dev);
+ if (dev->reg_state == NETREG_REGISTERED) {
+ memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
+ udp_tunnel_nic_reset_ntf(dev);
+ }
rtnl_unlock();
return count;
@@ -144,23 +146,23 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
else
ns->udp_ports.ports = nsim_dev->udp_ports.__ports;
- debugfs_create_u32("udp_ports_inject_error", 0600,
- ns->nsim_dev_port->ddir,
+ ns->udp_ports.ddir = debugfs_create_dir("udp_ports",
+ ns->nsim_dev_port->ddir);
+
+ debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir,
&ns->udp_ports.inject_error);
ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0];
ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS;
- debugfs_create_u32_array("udp_ports_table0", 0400,
- ns->nsim_dev_port->ddir,
+ debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir,
&ns->udp_ports.dfs_ports[0]);
ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1];
ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS;
- debugfs_create_u32_array("udp_ports_table1", 0400,
- ns->nsim_dev_port->ddir,
+ debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir,
&ns->udp_ports.dfs_ports[1]);
- debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir,
+ debugfs_create_file("reset", 0200, ns->udp_ports.ddir,
dev, &nsim_udp_tunnels_info_reset_fops);
/* Note: it's not normal to allocate the info struct like this!
@@ -196,6 +198,9 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
void nsim_udp_tunnels_info_destroy(struct net_device *dev)
{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ debugfs_remove_recursive(ns->udp_ports.ddir);
kfree(dev->udp_tunnel_nic_info);
dev->udp_tunnel_nic_info = NULL;
}
diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
index 4494b3e39ce2..a3996471a1c9 100644
--- a/drivers/net/phy/marvell-88q2xxx.c
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -95,6 +95,10 @@
#define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246
+struct mv88q2xxx_priv {
+ bool enable_temp;
+};
+
struct mmd_val {
int devad;
u32 regnum;
@@ -710,17 +714,12 @@ static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = {
static int mv88q2xxx_hwmon_probe(struct phy_device *phydev)
{
+ struct mv88q2xxx_priv *priv = phydev->priv;
struct device *dev = &phydev->mdio.dev;
struct device *hwmon;
char *hwmon_name;
- int ret;
-
- /* Enable temperature sense */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2,
- MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0);
- if (ret < 0)
- return ret;
+ priv->enable_temp = true;
hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev));
if (IS_ERR(hwmon_name))
return PTR_ERR(hwmon_name);
@@ -743,6 +742,14 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev)
static int mv88q2xxx_probe(struct phy_device *phydev)
{
+ struct mv88q2xxx_priv *priv;
+
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
return mv88q2xxx_hwmon_probe(phydev);
}
@@ -810,6 +817,18 @@ static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev)
static int mv88q222x_config_init(struct phy_device *phydev)
{
+ struct mv88q2xxx_priv *priv = phydev->priv;
+ int ret;
+
+ /* Enable temperature sense */
+ if (priv->enable_temp) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR2,
+ MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0);
+ if (ret < 0)
+ return ret;
+ }
+
if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0)
return mv88q222x_revb0_config_init(phydev);
else
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 323717a4821f..34231b5b9175 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -1297,6 +1297,8 @@ static int nxp_c45_soft_reset(struct phy_device *phydev)
if (ret)
return ret;
+ usleep_range(2000, 2050);
+
return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
VEND1_DEVICE_CONTROL, ret,
!(ret & DEVICE_CONTROL_RESET), 20000,
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 46afb95ffabe..a19789b57190 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -61,7 +61,18 @@
#define IPHETH_USBINTF_PROTO 1
#define IPHETH_IP_ALIGN 2 /* padding at front of URB */
-#define IPHETH_NCM_HEADER_SIZE (12 + 96) /* NCMH + NCM0 */
+/* On iOS devices, NCM headers in RX have a fixed size regardless of DPE count:
+ * - NTH16 (NCMH): 12 bytes, as per CDC NCM 1.0 spec
+ * - NDP16 (NCM0): 96 bytes, of which
+ * - NDP16 fixed header: 8 bytes
+ * - maximum of 22 DPEs (21 datagrams + trailer), 4 bytes each
+ */
+#define IPHETH_NDP16_MAX_DPE 22
+#define IPHETH_NDP16_HEADER_SIZE (sizeof(struct usb_cdc_ncm_ndp16) + \
+ IPHETH_NDP16_MAX_DPE * \
+ sizeof(struct usb_cdc_ncm_dpe16))
+#define IPHETH_NCM_HEADER_SIZE (sizeof(struct usb_cdc_ncm_nth16) + \
+ IPHETH_NDP16_HEADER_SIZE)
#define IPHETH_TX_BUF_SIZE ETH_FRAME_LEN
#define IPHETH_RX_BUF_SIZE_LEGACY (IPHETH_IP_ALIGN + ETH_FRAME_LEN)
#define IPHETH_RX_BUF_SIZE_NCM 65536
@@ -207,15 +218,23 @@ static int ipheth_rcvbulk_callback_legacy(struct urb *urb)
return ipheth_consume_skb(buf, len, dev);
}
+/* In "NCM mode", the iOS device encapsulates RX (phone->computer) traffic
+ * in NCM Transfer Blocks (similarly to CDC NCM). However, unlike reverse
+ * tethering (handled by the `cdc_ncm` driver), regular tethering is not
+ * compliant with the CDC NCM spec, as the device is missing the necessary
+ * descriptors, and TX (computer->phone) traffic is not encapsulated
+ * at all. Thus `ipheth` implements a very limited subset of the spec with
+ * the sole purpose of parsing RX URBs.
+ */
static int ipheth_rcvbulk_callback_ncm(struct urb *urb)
{
struct usb_cdc_ncm_nth16 *ncmh;
struct usb_cdc_ncm_ndp16 *ncm0;
struct usb_cdc_ncm_dpe16 *dpe;
struct ipheth_device *dev;
+ u16 dg_idx, dg_len;
int retval = -EINVAL;
char *buf;
- int len;
dev = urb->context;
@@ -226,40 +245,42 @@ static int ipheth_rcvbulk_callback_ncm(struct urb *urb)
ncmh = urb->transfer_buffer;
if (ncmh->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN) ||
- le16_to_cpu(ncmh->wNdpIndex) >= urb->actual_length) {
- dev->net->stats.rx_errors++;
- return retval;
- }
+ /* On iOS, NDP16 directly follows NTH16 */
+ ncmh->wNdpIndex != cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)))
+ goto rx_error;
- ncm0 = urb->transfer_buffer + le16_to_cpu(ncmh->wNdpIndex);
- if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN) ||
- le16_to_cpu(ncmh->wHeaderLength) + le16_to_cpu(ncm0->wLength) >=
- urb->actual_length) {
- dev->net->stats.rx_errors++;
- return retval;
- }
+ ncm0 = urb->transfer_buffer + sizeof(struct usb_cdc_ncm_nth16);
+ if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN))
+ goto rx_error;
dpe = ncm0->dpe16;
- while (le16_to_cpu(dpe->wDatagramIndex) != 0 &&
- le16_to_cpu(dpe->wDatagramLength) != 0) {
- if (le16_to_cpu(dpe->wDatagramIndex) >= urb->actual_length ||
- le16_to_cpu(dpe->wDatagramIndex) +
- le16_to_cpu(dpe->wDatagramLength) > urb->actual_length) {
+ for (int dpe_i = 0; dpe_i < IPHETH_NDP16_MAX_DPE; ++dpe_i, ++dpe) {
+ dg_idx = le16_to_cpu(dpe->wDatagramIndex);
+ dg_len = le16_to_cpu(dpe->wDatagramLength);
+
+ /* Null DPE must be present after last datagram pointer entry
+ * (3.3.1 USB CDC NCM spec v1.0)
+ */
+ if (dg_idx == 0 && dg_len == 0)
+ return 0;
+
+ if (dg_idx < IPHETH_NCM_HEADER_SIZE ||
+ dg_idx >= urb->actual_length ||
+ dg_len > urb->actual_length - dg_idx) {
dev->net->stats.rx_length_errors++;
return retval;
}
- buf = urb->transfer_buffer + le16_to_cpu(dpe->wDatagramIndex);
- len = le16_to_cpu(dpe->wDatagramLength);
+ buf = urb->transfer_buffer + dg_idx;
- retval = ipheth_consume_skb(buf, len, dev);
+ retval = ipheth_consume_skb(buf, dg_len, dev);
if (retval != 0)
return retval;
-
- dpe++;
}
- return 0;
+rx_error:
+ dev->net->stats.rx_errors++;
+ return retval;
}
static void ipheth_rcvbulk_callback(struct urb *urb)
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 01a3b2417a54..ddff6f19ff98 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -71,6 +71,14 @@
#define MSR_SPEED (1<<3)
#define MSR_LINK (1<<2)
+/* USB endpoints */
+enum rtl8150_usb_ep {
+ RTL8150_USB_EP_CONTROL = 0,
+ RTL8150_USB_EP_BULK_IN = 1,
+ RTL8150_USB_EP_BULK_OUT = 2,
+ RTL8150_USB_EP_INT_IN = 3,
+};
+
/* Interrupt pipe data */
#define INT_TSR 0x00
#define INT_RSR 0x01
@@ -867,6 +875,13 @@ static int rtl8150_probe(struct usb_interface *intf,
struct usb_device *udev = interface_to_usbdev(intf);
rtl8150_t *dev;
struct net_device *netdev;
+ static const u8 bulk_ep_addr[] = {
+ RTL8150_USB_EP_BULK_IN | USB_DIR_IN,
+ RTL8150_USB_EP_BULK_OUT | USB_DIR_OUT,
+ 0};
+ static const u8 int_ep_addr[] = {
+ RTL8150_USB_EP_INT_IN | USB_DIR_IN,
+ 0};
netdev = alloc_etherdev(sizeof(rtl8150_t));
if (!netdev)
@@ -880,6 +895,13 @@ static int rtl8150_probe(struct usb_interface *intf,
return -ENOMEM;
}
+ /* Verify that all required endpoints are present */
+ if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) ||
+ !usb_check_int_endpoints(intf, int_ep_addr)) {
+ dev_err(&intf->dev, "couldn't find required endpoints\n");
+ goto out;
+ }
+
tasklet_setup(&dev->tl, rx_fixup);
spin_lock_init(&dev->rx_pool_lock);
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index d2023e7131bd..6e6e9f05509a 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -411,6 +411,11 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb
struct tunnel_msg *tmsg;
struct net_device *dev;
+ if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) {
+ NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
+ return -EINVAL;
+ }
+
tmsg = nlmsg_data(cb->nlh);
if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index a259f4dd9540..413973d05b43 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1479,14 +1479,13 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev)
tasklet_enable(&dev->mt76.pre_tbtt_tasklet);
mt7603_beacon_set_timer(dev, -1, beacon_int);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
- napi_schedule(&dev->mt76.tx_napi);
-
napi_enable(&dev->mt76.napi[0]);
- napi_schedule(&dev->mt76.napi[0]);
-
napi_enable(&dev->mt76.napi[1]);
+
+ local_bh_disable();
+ napi_schedule(&dev->mt76.tx_napi);
+ napi_schedule(&dev->mt76.napi[0]);
napi_schedule(&dev->mt76.napi[1]);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 9a278589df4e..68010e27f065 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -164,12 +164,16 @@ static int mt7615_pci_resume(struct pci_dev *pdev)
dev_err(mdev->dev, "PDMA engine must be reinitialized\n");
mt76_worker_enable(&mdev->tx_worker);
- local_bh_disable();
+
mt76_for_each_q_rx(mdev, i) {
napi_enable(&mdev->napi[i]);
- napi_schedule(&mdev->napi[i]);
}
napi_enable(&mdev->tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ napi_schedule(&mdev->napi[i]);
+ }
napi_schedule(&mdev->tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index a0ca3bbdfcaf..c2e4e6aabd9f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -262,12 +262,14 @@ void mt7615_mac_reset_work(struct work_struct *work)
mt76_worker_enable(&dev->mt76.tx_worker);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
- napi_schedule(&dev->mt76.tx_napi);
-
mt76_for_each_q_rx(&dev->mt76, i) {
napi_enable(&dev->mt76.napi[i]);
+ }
+
+ local_bh_disable();
+ napi_schedule(&dev->mt76.tx_napi);
+ mt76_for_each_q_rx(&dev->mt76, i) {
napi_schedule(&dev->mt76.napi[i]);
}
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 1eb955f3ca13..b456ccd00d58 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -282,14 +282,16 @@ static int mt76x0e_resume(struct pci_dev *pdev)
mt76_worker_enable(&mdev->tx_worker);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
mt76_queue_rx_reset(dev, i);
napi_enable(&mdev->napi[i]);
- napi_schedule(&mdev->napi[i]);
}
-
napi_enable(&mdev->tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ napi_schedule(&mdev->napi[i]);
+ }
napi_schedule(&mdev->tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 7d840ad4ae65..a82c75ba26e6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -504,12 +504,14 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
mt76_worker_enable(&dev->mt76.tx_worker);
tasklet_enable(&dev->mt76.pre_tbtt_tasklet);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
- napi_schedule(&dev->mt76.tx_napi);
-
mt76_for_each_q_rx(&dev->mt76, i) {
napi_enable(&dev->mt76.napi[i]);
+ }
+
+ local_bh_disable();
+ napi_schedule(&dev->mt76.tx_napi);
+ mt76_for_each_q_rx(&dev->mt76, i) {
napi_schedule(&dev->mt76.napi[i]);
}
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index 67c9d1caa0bd..727bfdd00b40 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -151,12 +151,15 @@ mt76x2e_resume(struct pci_dev *pdev)
mt76_worker_enable(&mdev->tx_worker);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
napi_enable(&mdev->napi[i]);
- napi_schedule(&mdev->napi[i]);
}
napi_enable(&mdev->tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ napi_schedule(&mdev->napi[i]);
+ }
napi_schedule(&mdev->tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 13bdc0a7174c..2ba6eb3038ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1356,10 +1356,15 @@ mt7915_mac_restart(struct mt7915_dev *dev)
mt7915_dma_reset(dev, true);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
if (mdev->q_rx[i].ndesc) {
napi_enable(&dev->mt76.napi[i]);
+ }
+ }
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ if (mdev->q_rx[i].ndesc) {
napi_schedule(&dev->mt76.napi[i]);
}
}
@@ -1419,8 +1424,9 @@ out:
if (phy2)
clear_bit(MT76_RESET, &phy2->mt76->state);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
+
+ local_bh_disable();
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
@@ -1570,9 +1576,12 @@ void mt7915_mac_reset_work(struct work_struct *work)
if (phy2)
clear_bit(MT76_RESET, &phy2->mt76->state);
- local_bh_disable();
mt76_for_each_q_rx(&dev->mt76, i) {
napi_enable(&dev->mt76.napi[i]);
+ }
+
+ local_bh_disable();
+ mt76_for_each_q_rx(&dev->mt76, i) {
napi_schedule(&dev->mt76.napi[i]);
}
local_bh_enable();
@@ -1581,8 +1590,8 @@ void mt7915_mac_reset_work(struct work_struct *work)
mt76_worker_enable(&dev->mt76.tx_worker);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
+ local_bh_disable();
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index ba870e1b05fb..a0c9df3c2cc7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -523,12 +523,15 @@ static int mt7921_pci_resume(struct device *device)
mt76_worker_enable(&mdev->tx_worker);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
napi_enable(&mdev->napi[i]);
- napi_schedule(&mdev->napi[i]);
}
napi_enable(&mdev->tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ napi_schedule(&mdev->napi[i]);
+ }
napi_schedule(&mdev->tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
index 2452b1a2d118..881812ba03ff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
@@ -81,9 +81,12 @@ int mt7921e_mac_reset(struct mt792x_dev *dev)
mt792x_wpdma_reset(dev, true);
- local_bh_disable();
mt76_for_each_q_rx(&dev->mt76, i) {
napi_enable(&dev->mt76.napi[i]);
+ }
+
+ local_bh_disable();
+ mt76_for_each_q_rx(&dev->mt76, i) {
napi_schedule(&dev->mt76.napi[i]);
}
local_bh_enable();
@@ -115,8 +118,8 @@ int mt7921e_mac_reset(struct mt792x_dev *dev)
err = __mt7921_start(&dev->phy);
out:
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
+ local_bh_disable();
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
index f36893e20c61..c7b5dc1dbb34 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
@@ -556,12 +556,15 @@ static int mt7925_pci_resume(struct device *device)
mt76_worker_enable(&mdev->tx_worker);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
napi_enable(&mdev->napi[i]);
- napi_schedule(&mdev->napi[i]);
}
napi_enable(&mdev->tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(mdev, i) {
+ napi_schedule(&mdev->napi[i]);
+ }
napi_schedule(&mdev->tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c
index faedbf766d1a..4578d16bf456 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c
@@ -101,12 +101,15 @@ int mt7925e_mac_reset(struct mt792x_dev *dev)
mt792x_wpdma_reset(dev, true);
- local_bh_disable();
mt76_for_each_q_rx(&dev->mt76, i) {
napi_enable(&dev->mt76.napi[i]);
- napi_schedule(&dev->mt76.napi[i]);
}
napi_enable(&dev->mt76.tx_napi);
+
+ local_bh_disable();
+ mt76_for_each_q_rx(&dev->mt76, i) {
+ napi_schedule(&dev->mt76.napi[i]);
+ }
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index bc8cba4dca47..019c925ae600 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -1695,7 +1695,6 @@ mt7996_mac_restart(struct mt7996_dev *dev)
mt7996_dma_reset(dev, true);
- local_bh_disable();
mt76_for_each_q_rx(mdev, i) {
if (mtk_wed_device_active(&dev->mt76.mmio.wed) &&
mt76_queue_is_wed_rro(&mdev->q_rx[i]))
@@ -1703,10 +1702,11 @@ mt7996_mac_restart(struct mt7996_dev *dev)
if (mdev->q_rx[i].ndesc) {
napi_enable(&dev->mt76.napi[i]);
+ local_bh_disable();
napi_schedule(&dev->mt76.napi[i]);
+ local_bh_enable();
}
}
- local_bh_enable();
clear_bit(MT76_MCU_RESET, &dev->mphy.state);
clear_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
@@ -1764,8 +1764,8 @@ out:
if (phy3)
clear_bit(MT76_RESET, &phy3->mt76->state);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
+ local_bh_disable();
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
@@ -1958,23 +1958,23 @@ void mt7996_mac_reset_work(struct work_struct *work)
if (phy3)
clear_bit(MT76_RESET, &phy3->mt76->state);
- local_bh_disable();
mt76_for_each_q_rx(&dev->mt76, i) {
if (mtk_wed_device_active(&dev->mt76.mmio.wed) &&
mt76_queue_is_wed_rro(&dev->mt76.q_rx[i]))
continue;
napi_enable(&dev->mt76.napi[i]);
+ local_bh_disable();
napi_schedule(&dev->mt76.napi[i]);
+ local_bh_enable();
}
- local_bh_enable();
tasklet_schedule(&dev->mt76.irq_tasklet);
mt76_worker_enable(&dev->mt76.tx_worker);
- local_bh_disable();
napi_enable(&dev->mt76.tx_napi);
+ local_bh_disable();
napi_schedule(&dev->mt76.tx_napi);
local_bh_enable();
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index ea96a14d72d1..bf6468c56419 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -4,6 +4,7 @@
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/posix-clock.h>
#include <linux/poll.h>
@@ -176,6 +177,9 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
struct timespec64 ts;
int enable, err = 0;
+ if (in_compat_syscall() && cmd != PTP_ENABLE_PPS && cmd != PTP_ENABLE_PPS2)
+ arg = (unsigned long)compat_ptr(arg);
+
tsevq = pccontext->private_clkdata;
switch (cmd) {
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index b932425ddc6a..35a5994bf64f 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -217,6 +217,11 @@ static int ptp_getcycles64(struct ptp_clock_info *info, struct timespec64 *ts)
return info->gettime64(info, ts);
}
+static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on)
+{
+ return -EOPNOTSUPP;
+}
+
static void ptp_aux_kworker(struct kthread_work *work)
{
struct ptp_clock *ptp = container_of(work, struct ptp_clock,
@@ -294,6 +299,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
ptp->info->getcrosscycles = ptp->info->getcrosststamp;
}
+ if (!ptp->info->enable)
+ ptp->info->enable = ptp_enable;
+
if (ptp->info->do_aux_work) {
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
ptp->kworker = kthread_run_worker(0, "ptp%d", ptp->index);
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index fbffd451031f..45bd001206a2 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -245,7 +245,6 @@ static void sclp_request_timeout(bool force_restart);
static void sclp_process_queue(void);
static void __sclp_make_read_req(void);
static int sclp_init_mask(int calculate);
-static int sclp_init(void);
static void
__sclp_queue_read_req(void)
@@ -1251,8 +1250,7 @@ static struct platform_driver sclp_pdrv = {
/* Initialize SCLP driver. Return zero if driver is operational, non-zero
* otherwise. */
-static int
-sclp_init(void)
+int sclp_init(void)
{
unsigned long flags;
int rc = 0;
@@ -1305,13 +1303,7 @@ fail_unlock:
static __init int sclp_initcall(void)
{
- int rc;
-
- rc = platform_driver_register(&sclp_pdrv);
- if (rc)
- return rc;
-
- return sclp_init();
+ return platform_driver_register(&sclp_pdrv);
}
arch_initcall(sclp_initcall);
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 3dd50ac9c5b0..b2d93a6e36c4 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -123,7 +123,7 @@ static DECLARE_WAIT_QUEUE_HEAD(read_wait_queue);
*/
static struct vmlogrdr_priv_t sys_ser[] = {
- { .system_service = "*LOGREC ",
+ { .system_service = { '*', 'L', 'O', 'G', 'R', 'E', 'C', ' ' },
.internal_name = "logrec",
.recording_name = "EREP",
.minor_num = 0,
@@ -132,7 +132,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
.autorecording = 1,
.autopurge = 1,
},
- { .system_service = "*ACCOUNT",
+ { .system_service = { '*', 'A', 'C', 'C', 'O', 'U', 'N', 'T' },
.internal_name = "account",
.recording_name = "ACCOUNT",
.minor_num = 1,
@@ -141,7 +141,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
.autorecording = 1,
.autopurge = 1,
},
- { .system_service = "*SYMPTOM",
+ { .system_service = { '*', 'S', 'Y', 'M', 'P', 'T', 'O', 'M' },
.internal_name = "symptom",
.recording_name = "SYMPTOM",
.minor_num = 2,
@@ -356,7 +356,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp)
if (connect_rc) {
pr_err("vmlogrdr: iucv connection to %s "
"failed with rc %i \n",
- logptr->system_service, connect_rc);
+ logptr->internal_name, connect_rc);
goto out_path;
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 698c43dd5dc8..f28bc763847a 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -202,7 +202,7 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
return inode->i_sb->s_fs_info;
}
-static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry)
+static inline struct v9fs_session_info *v9fs_dentry2v9ses(const struct dentry *dentry)
{
return dentry->d_sb->s_fs_info;
}
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 01338d4c2d9e..5061f192eafd 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -61,7 +61,7 @@ static void v9fs_dentry_release(struct dentry *dentry)
p9_fid_put(hlist_entry(p, struct p9_fid, dlist));
}
-static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
struct p9_fid *fid;
struct inode *inode;
@@ -99,14 +99,36 @@ out_valid:
return 1;
}
+static int v9fs_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
+{
+ return __v9fs_lookup_revalidate(dentry, flags);
+}
+
+static bool v9fs_dentry_unalias_trylock(const struct dentry *dentry)
+{
+ struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
+ return down_write_trylock(&v9ses->rename_sem);
+}
+
+static void v9fs_dentry_unalias_unlock(const struct dentry *dentry)
+{
+ struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
+ up_write(&v9ses->rename_sem);
+}
+
const struct dentry_operations v9fs_cached_dentry_operations = {
.d_revalidate = v9fs_lookup_revalidate,
- .d_weak_revalidate = v9fs_lookup_revalidate,
+ .d_weak_revalidate = __v9fs_lookup_revalidate,
.d_delete = v9fs_cached_dentry_delete,
.d_release = v9fs_dentry_release,
+ .d_unalias_trylock = v9fs_dentry_unalias_trylock,
+ .d_unalias_unlock = v9fs_dentry_unalias_unlock,
};
const struct dentry_operations v9fs_dentry_operations = {
.d_delete = always_delete_dentry,
.d_release = v9fs_dentry_release,
+ .d_unalias_trylock = v9fs_dentry_unalias_trylock,
+ .d_unalias_unlock = v9fs_dentry_unalias_unlock,
};
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a843c36fc471..02cbf38e1a77 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -23,7 +23,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, struct dir_context *ctx);
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+static int afs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
static void afs_d_iput(struct dentry *dentry, struct inode *inode);
static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -597,19 +598,19 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
* Do a lookup of a single name in a directory
* - just returns the FID the dentry name maps to if found
*/
-static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
+static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
struct afs_fid *fid,
afs_dataversion_t *_dir_version)
{
struct afs_super_info *as = dir->i_sb->s_fs_info;
struct afs_lookup_one_cookie cookie = {
.ctx.actor = afs_lookup_one_filldir,
- .name = dentry->d_name,
+ .name = *name,
.fid.vid = as->volume->vid
};
int ret;
- _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+ _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
/* search the directory */
ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
@@ -1023,21 +1024,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
/*
* Check the validity of a dentry under RCU conditions.
*/
-static int afs_d_revalidate_rcu(struct dentry *dentry)
+static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry)
{
- struct afs_vnode *dvnode;
- struct dentry *parent;
- struct inode *dir;
long dir_version, de_version;
_enter("%p", dentry);
- /* Check the parent directory is still valid first. */
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- dvnode = AFS_FS_I(dir);
if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
return -ECHILD;
@@ -1065,11 +1057,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry)
* - NOTE! the hit can be a negative hit too, so we can't assume we have an
* inode
*/
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct afs_vnode *vnode, *dir;
+ struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir);
struct afs_fid fid;
- struct dentry *parent;
struct inode *inode;
struct key *key;
afs_dataversion_t dir_version, invalid_before;
@@ -1077,7 +1069,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
int ret;
if (flags & LOOKUP_RCU)
- return afs_d_revalidate_rcu(dentry);
+ return afs_d_revalidate_rcu(dir, dentry);
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
@@ -1092,14 +1084,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (IS_ERR(key))
key = NULL;
- /* Hold the parent dentry so we can peer at it */
- parent = dget_parent(dentry);
- dir = AFS_FS_I(d_inode(parent));
-
/* validate the parent directory */
ret = afs_validate(dir, key);
if (ret == -ERESTARTSYS) {
- dput(parent);
key_put(key);
return ret;
}
@@ -1127,7 +1114,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, &dir_version);
+ ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
@@ -1171,22 +1158,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out_valid;
default:
- _debug("failed to iterate dir %pd: %d",
- parent, ret);
+ _debug("failed to iterate parent %pd2: %d", dentry, ret);
goto not_found;
}
out_valid:
dentry->d_fsdata = (void *)(unsigned long)dir_version;
out_valid_noupdate:
- dput(parent);
key_put(key);
_leave(" = 1 [valid]");
return 1;
not_found:
_debug("dropping dentry %pd2", dentry);
- dput(parent);
key_put(key);
_leave(" = 0 [bad]");
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 672ca2c1d37d..ca755e8d1a37 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -24,7 +24,10 @@ do { \
} while (0)
const char * const bch2_btree_node_flags[] = {
-#define x(f) #f,
+ "typebit",
+ "typebit",
+ "typebit",
+#define x(f) [BTREE_NODE_##f] = #f,
BTREE_FLAGS()
#undef x
NULL
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 367231ab1980..5988219c6908 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2239,8 +2239,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
if (unlikely(ret))
return bkey_s_c_err(ret);
- btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
-
k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
if (!k.k)
return k;
@@ -2251,6 +2249,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
iter->k = u;
k.k = &iter->k;
+ btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
return k;
}
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 3b62296c3100..c378b97ebeca 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -291,8 +291,10 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
unsigned flags)
{
- if (flags & BTREE_ITER_cached_nofill)
+ if (flags & BTREE_ITER_cached_nofill) {
+ ck_path->l[0].b = NULL;
return 0;
+ }
struct bch_fs *c = trans->c;
struct btree_iter iter;
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 6b79b672e0b1..2760dd9569ed 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
- trans->journal_u64s, flags);
+ trans->journal_u64s, flags, trans);
}
#define JSET_ENTRY_LOG_U64s 4
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index f99ff1819597..114bf2f3879f 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -4,6 +4,7 @@
#include "compress.h"
#include "error.h"
#include "extents.h"
+#include "io_write.h"
#include "opts.h"
#include "super-io.h"
@@ -254,11 +255,14 @@ err:
goto out;
}
-int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
- struct bch_extent_crc_unpacked *crc)
+int bch2_bio_uncompress_inplace(struct bch_write_op *op,
+ struct bio *bio)
{
+ struct bch_fs *c = op->c;
+ struct bch_extent_crc_unpacked *crc = &op->crc;
struct bbuf data = { NULL };
size_t dst_len = crc->uncompressed_size << 9;
+ int ret = 0;
/* bio must own its pages: */
BUG_ON(!bio->bi_vcnt);
@@ -266,17 +270,26 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
- bch_err(c, "error rewriting existing data: extent too big");
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "error rewriting existing data: extent too big");
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
}
data = __bounce_alloc(c, dst_len, WRITE);
if (__bio_uncompress(c, bio, data.b, *crc)) {
- if (!c->opts.no_data_io)
- bch_err(c, "error rewriting existing data: decompression error");
- bio_unmap_or_unbounce(c, data);
- return -EIO;
+ if (!c->opts.no_data_io) {
+ struct printbuf buf = PRINTBUF;
+ bch2_write_op_error(&buf, op);
+ prt_printf(&buf, "error rewriting existing data: decompression error");
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+ ret = -EIO;
+ goto err;
}
/*
@@ -293,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
crc->uncompressed_size = crc->live_size;
crc->offset = 0;
crc->csum = (struct bch_csum) { 0, 0 };
-
+err:
bio_unmap_or_unbounce(c, data);
- return 0;
+ return ret;
}
int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 607fd5e232c9..bec2f05bfd52 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
-int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
- struct bch_extent_crc_unpacked *);
+struct bch_write_op;
+int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
struct bvec_iter, struct bch_extent_crc_unpacked);
unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 585214931e05..337494facac6 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
return true;
}
-static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
+static noinline void trace_move_extent_finish2(struct data_update *u,
+ struct bkey_i *new,
+ struct bkey_i *insert)
{
- if (trace_move_extent_finish_enabled()) {
- struct printbuf buf = PRINTBUF;
+ struct bch_fs *c = u->op.c;
+ struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, k);
- trace_move_extent_finish(c, buf.buf);
- printbuf_exit(&buf);
- }
+ prt_newline(&buf);
+
+ bch2_data_update_to_text(&buf, u);
+ prt_newline(&buf);
+
+ prt_str_indented(&buf, "new replicas:\t");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
+ prt_newline(&buf);
+
+ prt_str_indented(&buf, "insert:\t");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+ prt_newline(&buf);
+
+ trace_move_extent_finish(c, buf.buf);
+ printbuf_exit(&buf);
}
static void trace_move_extent_fail2(struct data_update *m,
@@ -372,7 +385,8 @@ restart_drop_extra_replicas:
bch2_btree_iter_set_pos(&iter, next_pos);
this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
- trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
+ if (trace_move_extent_finish_enabled())
+ trace_move_extent_finish2(m, &new->k_i, insert);
}
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -525,34 +539,38 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
struct data_update_opts *data_opts)
{
printbuf_tabstop_push(out, 20);
- prt_str(out, "rewrite ptrs:\t");
+
+ prt_str_indented(out, "rewrite ptrs:\t");
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
prt_newline(out);
- prt_str(out, "kill ptrs:\t");
+ prt_str_indented(out, "kill ptrs:\t");
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
prt_newline(out);
- prt_str(out, "target:\t");
+ prt_str_indented(out, "target:\t");
bch2_target_to_text(out, c, data_opts->target);
prt_newline(out);
- prt_str(out, "compression:\t");
+ prt_str_indented(out, "compression:\t");
bch2_compression_opt_to_text(out, io_opts->background_compression);
prt_newline(out);
- prt_str(out, "opts.replicas:\t");
+ prt_str_indented(out, "opts.replicas:\t");
prt_u64(out, io_opts->data_replicas);
+ prt_newline(out);
- prt_str(out, "extra replicas:\t");
+ prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
{
- bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
- prt_newline(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
+ prt_newline(out);
+
+ prt_str_indented(out, "old key:\t");
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index b5de52a50d10..55333e82d1fe 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -20,6 +20,7 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
+#include "journal_reclaim.h"
#include "super.h"
#include <linux/console.h>
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 3e71860f66b9..dd508d93e9fc 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -406,7 +406,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
}
-static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
{
__bch2_write_op_error(out, op, op->pos.offset);
}
@@ -873,7 +873,7 @@ static enum prep_encoded_ret {
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return PREP_ENCODED_CHECKSUM_ERR;
- if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
+ if (bch2_bio_uncompress_inplace(op, bio))
return PREP_ENCODED_ERR;
}
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 5400ce94ee57..b4626013abc8 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -20,6 +20,8 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);
+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
+
#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 2cd20114b74b..cb2c3722f674 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq)
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
- unsigned i;
-
- for (i = 0; i < ARRAY_SIZE(p->list); i++)
- INIT_LIST_HEAD(&p->list[i]);
- INIT_LIST_HEAD(&p->flushed);
+ for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++)
+ INIT_LIST_HEAD(&p->unflushed[i]);
+ for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++)
+ INIT_LIST_HEAD(&p->flushed[i]);
atomic_set(&p->count, count);
p->devs.nr = 0;
}
@@ -601,6 +600,16 @@ out:
: -BCH_ERR_journal_res_get_blocked;
}
+static unsigned max_dev_latency(struct bch_fs *c)
+{
+ u64 nsecs = 0;
+
+ for_each_rw_member(c, ca)
+ nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
+
+ return nsecs_to_jiffies(nsecs);
+}
+
/*
* Essentially the entry function to the journaling code. When bcachefs is doing
* a btree insert, it calls this function to get the current journal write.
@@ -612,17 +621,31 @@ out:
* btree node write locks.
*/
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
- unsigned flags)
+ unsigned flags,
+ struct btree_trans *trans)
{
int ret;
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
- HZ * 10))
+ HZ))
return ret;
+ if (trans)
+ bch2_trans_unlock_long(trans);
+
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);
+
+ remaining_wait = max(0, remaining_wait - HZ);
+
+ if (closure_wait_event_timeout(&j->async_wait,
+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+ (flags & JOURNAL_RES_GET_NONBLOCK),
+ remaining_wait))
+ return ret;
+
struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
@@ -727,7 +750,7 @@ recheck_need_open:
* livelock:
*/
sched_annotate_sleep();
- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
@@ -848,7 +871,7 @@ out:
static int __bch2_journal_meta(struct journal *j)
{
struct journal_res res = {};
- int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
@@ -1602,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
__bch2_journal_debug_to_text(out, j);
spin_unlock(&j->lock);
}
-
-bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
-{
- struct journal_entry_pin_list *pin_list;
- struct journal_entry_pin *pin;
-
- spin_lock(&j->lock);
- if (!test_bit(JOURNAL_running, &j->flags)) {
- spin_unlock(&j->lock);
- return true;
- }
-
- *seq = max(*seq, j->pin.front);
-
- if (*seq >= j->pin.back) {
- spin_unlock(&j->lock);
- return true;
- }
-
- out->atomic++;
-
- pin_list = journal_seq_pin(j, *seq);
-
- prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
- printbuf_indent_add(out, 2);
-
- for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++)
- list_for_each_entry(pin, &pin_list->list[i], list)
- prt_printf(out, "\t%px %ps\n", pin, pin->flush);
-
- if (!list_empty(&pin_list->flushed))
- prt_printf(out, "flushed:\n");
-
- list_for_each_entry(pin, &pin_list->flushed, list)
- prt_printf(out, "\t%px %ps\n", pin, pin->flush);
-
- printbuf_indent_sub(out, 2);
-
- --out->atomic;
- spin_unlock(&j->lock);
-
- return false;
-}
-
-void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
-{
- u64 seq = 0;
-
- while (!bch2_journal_seq_pins_to_text(out, j, &seq))
- seq++;
-}
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index cb0df0663946..dccddd5420ad 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j,
}
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
- unsigned);
+ unsigned, struct btree_trans *);
/* First bits for BCH_WATERMARK: */
enum journal_res_flags {
@@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j,
}
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
- unsigned u64s, unsigned flags)
+ unsigned u64s, unsigned flags,
+ struct btree_trans *trans)
{
int ret;
@@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
if (journal_res_get_fast(j, res, flags))
goto out;
- ret = bch2_journal_res_get_slowpath(j, res, flags);
+ ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
if (ret)
return ret;
out:
@@ -429,8 +430,6 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
-void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
-bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 7f2efe85a805..11c39e0c34f4 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -17,6 +17,7 @@
#include "sb-clean.h"
#include "trace.h"
+#include <linux/ioprio.h>
#include <linux/string_choices.h>
void bch2_journal_pos_from_member_info_set(struct bch_fs *c)
@@ -1763,6 +1764,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 0);
BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector);
ca->prev_journal_sector = bio->bi_iter.bi_sector;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 3c8242606da7..6a9cefb635d6 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -327,8 +327,10 @@ void bch2_journal_reclaim_fast(struct journal *j)
popped = true;
}
- if (popped)
+ if (popped) {
bch2_journal_space_available(j);
+ __closure_wake_up(&j->reclaim_flush_wait);
+ }
}
bool __bch2_journal_pin_put(struct journal *j, u64 seq)
@@ -362,6 +364,9 @@ static inline bool __journal_pin_drop(struct journal *j,
pin->seq = 0;
list_del_init(&pin->list);
+ if (j->reclaim_flush_wait.list.first)
+ __closure_wake_up(&j->reclaim_flush_wait);
+
/*
* Unpinning a journal entry may make journal_next_bucket() succeed, if
* writing a new last_seq will now make another bucket available:
@@ -383,11 +388,11 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
fn == bch2_btree_node_flush1)
- return JOURNAL_PIN_btree;
+ return JOURNAL_PIN_TYPE_btree;
else if (fn == bch2_btree_key_cache_journal_flush)
- return JOURNAL_PIN_key_cache;
+ return JOURNAL_PIN_TYPE_key_cache;
else
- return JOURNAL_PIN_other;
+ return JOURNAL_PIN_TYPE_other;
}
static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
@@ -406,7 +411,12 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
atomic_inc(&pin_list->count);
pin->seq = seq;
pin->flush = flush_fn;
- list_add(&pin->list, &pin_list->list[type]);
+
+ if (list_empty(&pin_list->unflushed[type]) &&
+ j->reclaim_flush_wait.list.first)
+ __closure_wake_up(&j->reclaim_flush_wait);
+
+ list_add(&pin->list, &pin_list->unflushed[type]);
}
void bch2_journal_pin_copy(struct journal *j,
@@ -499,16 +509,15 @@ journal_get_next_pin(struct journal *j,
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
- unsigned i;
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
if (*seq > seq_to_flush && !allowed_above_seq)
break;
- for (i = 0; i < JOURNAL_PIN_NR; i++)
- if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
- ((1U << i) & allowed_above_seq)) {
- ret = list_first_entry_or_null(&pin_list->list[i],
+ for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
+ if (((BIT(i) & allowed_below_seq) && *seq <= seq_to_flush) ||
+ (BIT(i) & allowed_above_seq)) {
+ ret = list_first_entry_or_null(&pin_list->unflushed[i],
struct journal_entry_pin, list);
if (ret)
return ret;
@@ -544,8 +553,8 @@ static size_t journal_flush_pins(struct journal *j,
}
if (min_key_cache) {
- allowed_above |= 1U << JOURNAL_PIN_key_cache;
- allowed_below |= 1U << JOURNAL_PIN_key_cache;
+ allowed_above |= BIT(JOURNAL_PIN_TYPE_key_cache);
+ allowed_below |= BIT(JOURNAL_PIN_TYPE_key_cache);
}
cond_resched();
@@ -553,7 +562,9 @@ static size_t journal_flush_pins(struct journal *j,
j->last_flushed = jiffies;
spin_lock(&j->lock);
- pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
+ pin = journal_get_next_pin(j, seq_to_flush,
+ allowed_below,
+ allowed_above, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
@@ -576,7 +587,7 @@ static size_t journal_flush_pins(struct journal *j,
spin_lock(&j->lock);
/* Pin might have been dropped or rearmed: */
if (likely(!err && !j->flush_in_progress_dropped))
- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
j->flush_in_progress = NULL;
j->flush_in_progress_dropped = false;
spin_unlock(&j->lock);
@@ -816,10 +827,41 @@ int bch2_journal_reclaim_start(struct journal *j)
return 0;
}
+static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
+ unsigned types)
+{
+ struct journal_entry_pin_list *pin_list;
+ u64 seq;
+
+ spin_lock(&j->lock);
+ fifo_for_each_entry_ptr(pin_list, &j->pin, seq) {
+ if (seq > seq_to_flush)
+ break;
+
+ for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
+ if ((BIT(i) & types) &&
+ (!list_empty(&pin_list->unflushed[i]) ||
+ !list_empty(&pin_list->flushed[i]))) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+ }
+ spin_unlock(&j->lock);
+
+ return false;
+}
+
+static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_flush,
+ unsigned types)
+{
+ return journal_flush_pins(j, seq_to_flush, types, 0, 0, 0) ||
+ journal_pins_still_flushing(j, seq_to_flush, types);
+}
+
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
bool *did_work)
{
- int ret;
+ int ret = 0;
ret = bch2_journal_error(j);
if (ret)
@@ -827,12 +869,18 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
- if (journal_flush_pins(j, seq_to_flush,
- (1U << JOURNAL_PIN_key_cache)|
- (1U << JOURNAL_PIN_other), 0, 0, 0) ||
- journal_flush_pins(j, seq_to_flush,
- (1U << JOURNAL_PIN_btree), 0, 0, 0))
+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
+ BIT(JOURNAL_PIN_TYPE_key_cache)|
+ BIT(JOURNAL_PIN_TYPE_other))) {
+ *did_work = true;
+ goto unlock;
+ }
+
+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
+ BIT(JOURNAL_PIN_TYPE_btree))) {
*did_work = true;
+ goto unlock;
+ }
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
@@ -847,6 +895,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
!fifo_used(&j->pin);
spin_unlock(&j->lock);
+unlock:
mutex_unlock(&j->reclaim_lock);
return ret;
@@ -860,7 +909,7 @@ bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
if (!test_bit(JOURNAL_running, &j->flags))
return false;
- closure_wait_event(&j->async_wait,
+ closure_wait_event(&j->reclaim_flush_wait,
journal_flush_done(j, seq_to_flush, &did_work));
return did_work;
@@ -926,3 +975,54 @@ err:
return ret;
}
+
+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
+{
+ struct journal_entry_pin_list *pin_list;
+ struct journal_entry_pin *pin;
+
+ spin_lock(&j->lock);
+ if (!test_bit(JOURNAL_running, &j->flags)) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
+ *seq = max(*seq, j->pin.front);
+
+ if (*seq >= j->pin.back) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
+ out->atomic++;
+
+ pin_list = journal_seq_pin(j, *seq);
+
+ prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "unflushed:\n");
+ for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
+ list_for_each_entry(pin, &pin_list->unflushed[i], list)
+ prt_printf(out, "\t%px %ps\n", pin, pin->flush);
+
+ prt_printf(out, "flushed:\n");
+ for (unsigned i = 0; i < ARRAY_SIZE(pin_list->flushed); i++)
+ list_for_each_entry(pin, &pin_list->flushed[i], list)
+ prt_printf(out, "\t%px %ps\n", pin, pin->flush);
+
+ printbuf_indent_sub(out, 2);
+
+ --out->atomic;
+ spin_unlock(&j->lock);
+
+ return false;
+}
+
+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+{
+ u64 seq = 0;
+
+ while (!bch2_journal_seq_pins_to_text(out, j, &seq))
+ seq++;
+}
diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h
index ec84c3345281..0a73d7134e1c 100644
--- a/fs/bcachefs/journal_reclaim.h
+++ b/fs/bcachefs/journal_reclaim.h
@@ -78,4 +78,7 @@ static inline bool bch2_journal_flush_all_pins(struct journal *j)
int bch2_journal_flush_device_pins(struct journal *, int);
+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
+
#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index e9bd716fbb71..3ba433a48eb8 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -53,15 +53,15 @@ struct journal_buf {
*/
enum journal_pin_type {
- JOURNAL_PIN_btree,
- JOURNAL_PIN_key_cache,
- JOURNAL_PIN_other,
- JOURNAL_PIN_NR,
+ JOURNAL_PIN_TYPE_btree,
+ JOURNAL_PIN_TYPE_key_cache,
+ JOURNAL_PIN_TYPE_other,
+ JOURNAL_PIN_TYPE_NR,
};
struct journal_entry_pin_list {
- struct list_head list[JOURNAL_PIN_NR];
- struct list_head flushed;
+ struct list_head unflushed[JOURNAL_PIN_TYPE_NR];
+ struct list_head flushed[JOURNAL_PIN_TYPE_NR];
atomic_t count;
struct bch_devs_list devs;
};
@@ -226,6 +226,7 @@ struct journal {
/* Used when waiting because the journal was full */
wait_queue_head_t wait;
struct closure_waitlist async_wait;
+ struct closure_waitlist reclaim_flush_wait;
struct delayed_work write_work;
struct workqueue_struct *wq;
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 85c361e78ba5..21805509ab9e 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -215,7 +215,8 @@ static int bch2_copygc(struct moving_context *ctxt,
};
move_buckets buckets = { 0 };
struct move_bucket_in_flight *f;
- u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
+ u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen);
+ u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
@@ -245,7 +246,6 @@ static int bch2_copygc(struct moving_context *ctxt,
*did_work = true;
}
err:
- darray_exit(&buckets);
/* no entries in LRU btree found, or got to end: */
if (bch2_err_matches(ret, ENOENT))
@@ -254,8 +254,11 @@ err:
if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "from bch2_move_data()");
- moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
- trace_and_count(c, copygc, c, moved, 0, 0, 0);
+ sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen;
+ sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved;
+ trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved);
+
+ darray_exit(&buckets);
return ret;
}
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index e763d52e0f38..a182b5d454ba 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -476,13 +476,13 @@ enum fsck_err_opts {
NULL, "Enable nocow mode: enables runtime locking in\n"\
"data move path needed if nocow will ever be in use\n")\
x(copygc_enabled, u8, \
- OPT_FS|OPT_MOUNT, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable copygc: disable for debugging, or to\n"\
"quiet the system when doing performance testing\n")\
x(rebalance_enabled, u8, \
- OPT_FS|OPT_MOUNT, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable rebalance: disable for debugging, or to\n"\
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 0b4fe899209b..ea0a18364751 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -57,7 +57,7 @@ enum bch_fsck_flags {
x(bset_wrong_sector_offset, 44, 0) \
x(bset_empty, 45, 0) \
x(bset_bad_seq, 46, 0) \
- x(bset_blacklisted_journal_seq, 47, 0) \
+ x(bset_blacklisted_journal_seq, 47, FSCK_AUTOFIX) \
x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \
x(btree_node_bad_btree, 49, 0) \
x(btree_node_bad_level, 50, 0) \
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index 8c2c5539de2e..d78451c2a0c6 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -31,11 +31,11 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir
}
}
-static int fsck_rename_dirent(struct btree_trans *trans,
- struct snapshots_seen *s,
- const struct bch_hash_desc desc,
- struct bch_hash_info *hash_info,
- struct bkey_s_c_dirent old)
+static noinline int fsck_rename_dirent(struct btree_trans *trans,
+ struct snapshots_seen *s,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct bkey_s_c_dirent old)
{
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
@@ -71,11 +71,11 @@ static int fsck_rename_dirent(struct btree_trans *trans,
return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
}
-static int hash_pick_winner(struct btree_trans *trans,
- const struct bch_hash_desc desc,
- struct bch_hash_info *hash_info,
- struct bkey_s_c k1,
- struct bkey_s_c k2)
+static noinline int hash_pick_winner(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct bkey_s_c k1,
+ struct bkey_s_c k2)
{
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
@@ -142,8 +142,8 @@ fsck_err:
* All versions of the same inode in different snapshots must have the same hash
* seed/type: verify that the hash info we're using matches the root
*/
-static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
- struct bch_hash_info *hash_info)
+static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
+ struct bch_hash_info *hash_info)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 9d40b7d4ea29..56a5a7fbc0fd 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -902,32 +902,30 @@ TRACE_EVENT(evacuate_bucket,
TRACE_EVENT(copygc,
TP_PROTO(struct bch_fs *c,
- u64 sectors_moved, u64 sectors_not_moved,
- u64 buckets_moved, u64 buckets_not_moved),
- TP_ARGS(c,
- sectors_moved, sectors_not_moved,
- buckets_moved, buckets_not_moved),
+ u64 buckets,
+ u64 sectors_seen,
+ u64 sectors_moved),
+ TP_ARGS(c, buckets, sectors_seen, sectors_moved),
TP_STRUCT__entry(
__field(dev_t, dev )
+ __field(u64, buckets )
+ __field(u64, sectors_seen )
__field(u64, sectors_moved )
- __field(u64, sectors_not_moved )
- __field(u64, buckets_moved )
- __field(u64, buckets_not_moved )
),
TP_fast_assign(
__entry->dev = c->dev;
+ __entry->buckets = buckets;
+ __entry->sectors_seen = sectors_seen;
__entry->sectors_moved = sectors_moved;
- __entry->sectors_not_moved = sectors_not_moved;
- __entry->buckets_moved = buckets_moved;
- __entry->buckets_not_moved = buckets_moved;
),
- TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu",
+ TP_printk("%d,%d buckets %llu sectors seen %llu moved %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->sectors_moved, __entry->sectors_not_moved,
- __entry->buckets_moved, __entry->buckets_not_moved)
+ __entry->buckets,
+ __entry->sectors_seen,
+ __entry->sectors_moved)
);
TRACE_EVENT(copygc_wait,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bf388e07a02..62e99e65250d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1940,29 +1940,19 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
/*
* Check if cached dentry can be trusted.
*/
-static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int ceph_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc;
struct ceph_client *cl = mdsc->fsc->client;
int valid = 0;
- struct dentry *parent;
- struct inode *dir, *inode;
+ struct inode *inode;
- valid = fscrypt_d_revalidate(dentry, flags);
+ valid = fscrypt_d_revalidate(dir, name, dentry, flags);
if (valid <= 0)
return valid;
- if (flags & LOOKUP_RCU) {
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- inode = d_inode_rcu(dentry);
- } else {
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode = d_inode(dentry);
- }
+ inode = d_inode_rcu(dentry);
doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n",
dentry, dentry, inode, ceph_dentry(dentry)->offset,
@@ -2008,6 +1998,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
req->r_parent = dir;
ihold(dir);
+ req->r_dname = name;
+
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
mask |= CEPH_CAP_XATTR_SHARED;
@@ -2038,9 +2030,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid");
if (!valid)
ceph_dir_clear_complete(dir);
-
- if (!(flags & LOOKUP_RCU))
- dput(parent);
return valid;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 785fe489ef4b..d7a06b9aaef6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2621,6 +2621,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
{
struct inode *dir = req->r_parent;
struct dentry *dentry = req->r_dentry;
+ const struct qstr *name = req->r_dname;
u8 *cryptbuf = NULL;
u32 len = 0;
int ret = 0;
@@ -2641,8 +2642,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
if (!fscrypt_has_encryption_key(dir))
goto success;
- if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
- &len)) {
+ if (!name)
+ name = &dentry->d_name;
+
+ if (!fscrypt_fname_encrypted_size(dir, name->len, NAME_MAX, &len)) {
WARN_ON_ONCE(1);
return ERR_PTR(-ENAMETOOLONG);
}
@@ -2657,7 +2660,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
if (!cryptbuf)
return ERR_PTR(-ENOMEM);
- ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+ ret = fscrypt_fname_encrypt(dir, name, cryptbuf, len);
if (ret) {
kfree(cryptbuf);
return ERR_PTR(ret);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 38bb7e0d2d79..7c9fee9e80d4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -299,6 +299,8 @@ struct ceph_mds_request {
struct inode *r_target_inode; /* resulting inode */
struct inode *r_new_inode; /* new inode (for creates) */
+ const struct qstr *r_dname; /* stable name (for ->d_revalidate) */
+
#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 4e552ba7bd43..a3e2dfeedfbf 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -445,7 +445,8 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
}
/* called when a cache lookup succeeds */
-static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
+static int coda_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *de, unsigned int flags)
{
struct inode *inode;
struct coda_inode_info *cii;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 0ad52fbe51c9..010f9c0a4c2f 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -574,11 +574,10 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash);
* Validate dentries in encrypted directories to make sure we aren't potentially
* caching stale dentries after a key has been added.
*/
-int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
+int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct dentry *dir;
int err;
- int valid;
/*
* Plaintext names are always valid, since fscrypt doesn't support
@@ -591,30 +590,21 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
/*
* No-key name; valid if the directory's key is still unavailable.
*
- * Although fscrypt forbids rename() on no-key names, we still must use
- * dget_parent() here rather than use ->d_parent directly. That's
- * because a corrupted fs image may contain directory hard links, which
- * the VFS handles by moving the directory's dentry tree in the dcache
- * each time ->lookup() finds the directory and it already has a dentry
- * elsewhere. Thus ->d_parent can be changing, and we must safely grab
- * a reference to some ->d_parent to prevent it from being freed.
+ * Note in RCU mode we have to bail if we get here -
+ * fscrypt_get_encryption_info() may block.
*/
if (flags & LOOKUP_RCU)
return -ECHILD;
- dir = dget_parent(dentry);
/*
* Pass allow_unsupported=true, so that files with an unsupported
* encryption policy can be deleted.
*/
- err = fscrypt_get_encryption_info(d_inode(dir), true);
- valid = !fscrypt_has_encryption_key(d_inode(dir));
- dput(dir);
-
+ err = fscrypt_get_encryption_info(dir, true);
if (err < 0)
return err;
- return valid;
+ return !fscrypt_has_encryption_key(dir);
}
EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
diff --git a/fs/dcache.c b/fs/dcache.c
index 1cd929f17eec..9cc0d47da321 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -295,12 +295,16 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
return dentry_string_cmp(cs, ct, tcount);
}
+/*
+ * long names are allocated separately from dentry and never modified.
+ * Refcounted, freeing is RCU-delayed. See take_dentry_name_snapshot()
+ * for the reason why ->count and ->head can't be combined into a union.
+ * dentry_string_cmp() relies upon ->name[] being word-aligned.
+ */
struct external_name {
- union {
- atomic_t count;
- struct rcu_head head;
- } u;
- unsigned char name[];
+ atomic_t count;
+ struct rcu_head head;
+ unsigned char name[] __aligned(sizeof(unsigned long));
};
static inline struct external_name *external_name(struct dentry *dentry)
@@ -324,31 +328,45 @@ static void __d_free_external(struct rcu_head *head)
static inline int dname_external(const struct dentry *dentry)
{
- return dentry->d_name.name != dentry->d_iname;
+ return dentry->d_name.name != dentry->d_shortname.string;
}
void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
{
- spin_lock(&dentry->d_lock);
- name->name = dentry->d_name;
- if (unlikely(dname_external(dentry))) {
- atomic_inc(&external_name(dentry)->u.count);
+ unsigned seq;
+ const unsigned char *s;
+
+ rcu_read_lock();
+retry:
+ seq = read_seqcount_begin(&dentry->d_seq);
+ s = READ_ONCE(dentry->d_name.name);
+ name->name.hash_len = dentry->d_name.hash_len;
+ name->name.name = name->inline_name.string;
+ if (likely(s == dentry->d_shortname.string)) {
+ name->inline_name = dentry->d_shortname;
} else {
- memcpy(name->inline_name, dentry->d_iname,
- dentry->d_name.len + 1);
- name->name.name = name->inline_name;
+ struct external_name *p;
+ p = container_of(s, struct external_name, name[0]);
+ // get a valid reference
+ if (unlikely(!atomic_inc_not_zero(&p->count)))
+ goto retry;
+ name->name.name = s;
}
- spin_unlock(&dentry->d_lock);
+ if (read_seqcount_retry(&dentry->d_seq, seq)) {
+ release_dentry_name_snapshot(name);
+ goto retry;
+ }
+ rcu_read_unlock();
}
EXPORT_SYMBOL(take_dentry_name_snapshot);
void release_dentry_name_snapshot(struct name_snapshot *name)
{
- if (unlikely(name->name.name != name->inline_name)) {
+ if (unlikely(name->name.name != name->inline_name.string)) {
struct external_name *p;
p = container_of(name->name.name, struct external_name, name[0]);
- if (unlikely(atomic_dec_and_test(&p->u.count)))
- kfree_rcu(p, u.head);
+ if (unlikely(atomic_dec_and_test(&p->count)))
+ kfree_rcu(p, head);
}
}
EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -386,7 +404,7 @@ static void dentry_free(struct dentry *dentry)
WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
if (unlikely(dname_external(dentry))) {
struct external_name *p = external_name(dentry);
- if (likely(atomic_dec_and_test(&p->u.count))) {
+ if (likely(atomic_dec_and_test(&p->count))) {
call_rcu(&dentry->d_u.d_rcu, __d_free_external);
return;
}
@@ -1654,10 +1672,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
* will still always have a NUL at the end, even if we might
* be overwriting an internal NUL character
*/
- dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
+ dentry->d_shortname.string[DNAME_INLINE_LEN-1] = 0;
if (unlikely(!name)) {
name = &slash_name;
- dname = dentry->d_iname;
+ dname = dentry->d_shortname.string;
} else if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]);
struct external_name *p = kmalloc(size + name->len,
@@ -1667,10 +1685,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
- atomic_set(&p->u.count, 1);
+ atomic_set(&p->count, 1);
dname = p->name;
} else {
- dname = dentry->d_iname;
+ dname = dentry->d_shortname.string;
}
dentry->d_name.len = name->len;
@@ -2728,10 +2746,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
* dentry:internal, target:external. Steal target's
* storage and make target internal.
*/
- memcpy(target->d_iname, dentry->d_name.name,
- dentry->d_name.len + 1);
dentry->d_name.name = target->d_name.name;
- target->d_name.name = target->d_iname;
+ target->d_shortname = dentry->d_shortname;
+ target->d_name.name = target->d_shortname.string;
}
} else {
if (unlikely(dname_external(dentry))) {
@@ -2739,20 +2756,16 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
* dentry:external, target:internal. Give dentry's
* storage to target and make dentry internal
*/
- memcpy(dentry->d_iname, target->d_name.name,
- target->d_name.len + 1);
target->d_name.name = dentry->d_name.name;
- dentry->d_name.name = dentry->d_iname;
+ dentry->d_shortname = target->d_shortname;
+ dentry->d_name.name = dentry->d_shortname.string;
} else {
/*
* Both are internal.
*/
- unsigned int i;
- BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
- for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
- swap(((long *) &dentry->d_iname)[i],
- ((long *) &target->d_iname)[i]);
- }
+ for (int i = 0; i < DNAME_INLINE_WORDS; i++)
+ swap(dentry->d_shortname.words[i],
+ target->d_shortname.words[i]);
}
}
swap(dentry->d_name.hash_len, target->d_name.hash_len);
@@ -2764,16 +2777,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
if (unlikely(dname_external(dentry)))
old_name = external_name(dentry);
if (unlikely(dname_external(target))) {
- atomic_inc(&external_name(target)->u.count);
+ atomic_inc(&external_name(target)->count);
dentry->d_name = target->d_name;
} else {
- memcpy(dentry->d_iname, target->d_name.name,
- target->d_name.len + 1);
- dentry->d_name.name = dentry->d_iname;
+ dentry->d_shortname = target->d_shortname;
+ dentry->d_name.name = dentry->d_shortname.string;
dentry->d_name.hash_len = target->d_name.hash_len;
}
- if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
- kfree_rcu(old_name, u.head);
+ if (old_name && likely(atomic_dec_and_test(&old_name->count)))
+ kfree_rcu(old_name, head);
}
/*
@@ -2954,7 +2966,12 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias)
goto out_err;
m2 = &alias->d_parent->d_inode->i_rwsem;
out_unalias:
+ if (alias->d_op->d_unalias_trylock &&
+ !alias->d_op->d_unalias_trylock(alias))
+ goto out_err;
__d_move(alias, dentry, false);
+ if (alias->d_op->d_unalias_unlock)
+ alias->d_op->d_unalias_unlock(alias);
ret = 0;
out_err:
if (m2)
@@ -3102,12 +3119,12 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
{
struct dentry *dentry = file->f_path.dentry;
- BUG_ON(dentry->d_name.name != dentry->d_iname ||
+ BUG_ON(dname_external(dentry) ||
!hlist_unhashed(&dentry->d_u.d_alias) ||
!d_unlinked(dentry));
spin_lock(&dentry->d_parent->d_lock);
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
+ dentry->d_name.len = sprintf(dentry->d_shortname.string, "#%llu",
(unsigned long long)inode->i_ino);
spin_unlock(&dentry->d_lock);
spin_unlock(&dentry->d_parent->d_lock);
@@ -3195,7 +3212,7 @@ static void __init dcache_init(void)
*/
dentry_cache = KMEM_CACHE_USERCOPY(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT,
- d_iname);
+ d_shortname.string);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index acaa0825e9bb..1dfd5b81d831 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -17,7 +17,9 @@
/**
* ecryptfs_d_revalidate - revalidate an ecryptfs dentry
- * @dentry: The ecryptfs dentry
+ * @dir: inode of expected parent
+ * @name: expected name
+ * @dentry: dentry to revalidate
* @flags: lookup flags
*
* Called when the VFS needs to revalidate a dentry. This
@@ -28,7 +30,8 @@
* Returns 1 if valid, 0 otherwise.
*
*/
-static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int ecryptfs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
int rc = 1;
@@ -36,8 +39,15 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE)
- rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+ if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) {
+ struct inode *lower_dir = ecryptfs_inode_to_lower(dir);
+ struct name_snapshot n;
+
+ take_dentry_name_snapshot(&n, lower_dentry);
+ rc = lower_dentry->d_op->d_revalidate(lower_dir, &n.name,
+ lower_dentry, flags);
+ release_dentry_name_snapshot(&n);
+ }
if (d_really_is_positive(dentry)) {
struct inode *inode = d_inode(dentry);
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 099f80645072..691dd77b6ab5 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -31,10 +31,9 @@ static inline void exfat_d_version_set(struct dentry *dentry,
* If it happened, the negative dentry isn't actually negative anymore. So,
* drop it.
*/
-static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int exfat_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- int ret;
-
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -58,11 +57,7 @@ static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
return 0;
- spin_lock(&dentry->d_lock);
- ret = inode_eq_iversion(d_inode(dentry->d_parent),
- exfat_d_version(dentry));
- spin_unlock(&dentry->d_lock);
- return ret;
+ return inode_eq_iversion(dir, exfat_d_version(dentry));
}
/* returns the length of a struct qstr, ignoring trailing dots if necessary */
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 26c4fc37edcf..da4263a14a20 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -322,9 +322,7 @@ restart:
WARN_ON(!list_empty(&ei->i_fc_dilist));
spin_unlock(&sbi->s_fc_lock);
- if (fc_dentry->fcd_name.name &&
- fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
- kfree(fc_dentry->fcd_name.name);
+ release_dentry_name_snapshot(&fc_dentry->fcd_name);
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
return;
@@ -449,22 +447,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
node->fcd_op = dentry_update->op;
node->fcd_parent = dir->i_ino;
node->fcd_ino = inode->i_ino;
- if (dentry->d_name.len > DNAME_INLINE_LEN) {
- node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
- if (!node->fcd_name.name) {
- kmem_cache_free(ext4_fc_dentry_cachep, node);
- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle);
- mutex_lock(&ei->i_fc_lock);
- return -ENOMEM;
- }
- memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
- dentry->d_name.len);
- } else {
- memcpy(node->fcd_iname, dentry->d_name.name,
- dentry->d_name.len);
- node->fcd_name.name = node->fcd_iname;
- }
- node->fcd_name.len = dentry->d_name.len;
+ take_dentry_name_snapshot(&node->fcd_name, dentry);
INIT_LIST_HEAD(&node->fcd_dilist);
spin_lock(&sbi->s_fc_lock);
if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
@@ -832,7 +815,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
{
struct ext4_fc_dentry_info fcd;
struct ext4_fc_tl tl;
- int dlen = fc_dentry->fcd_name.len;
+ int dlen = fc_dentry->fcd_name.name.len;
u8 *dst = ext4_fc_reserve_space(sb,
EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
@@ -847,7 +830,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
dst += EXT4_FC_TAG_BASE_LEN;
memcpy(dst, &fcd, sizeof(fcd));
dst += sizeof(fcd);
- memcpy(dst, fc_dentry->fcd_name.name, dlen);
+ memcpy(dst, fc_dentry->fcd_name.name.name, dlen);
return true;
}
@@ -1328,9 +1311,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_del_init(&fc_dentry->fcd_dilist);
spin_unlock(&sbi->s_fc_lock);
- if (fc_dentry->fcd_name.name &&
- fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
- kfree(fc_dentry->fcd_name.name);
+ release_dentry_name_snapshot(&fc_dentry->fcd_name);
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
spin_lock(&sbi->s_fc_lock);
}
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 2fadb2c4780c..3bd534e4dbbf 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -109,8 +109,7 @@ struct ext4_fc_dentry_update {
int fcd_op; /* Type of update create / unlink / link */
int fcd_parent; /* Parent inode number */
int fcd_ino; /* Inode number */
- struct qstr fcd_name; /* Dirent name */
- unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
+ struct name_snapshot fcd_name; /* Dirent name */
struct list_head fcd_list;
struct list_head fcd_dilist;
};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 15bf32c21ac0..926c26e90ef8 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,17 +43,13 @@ static inline void vfat_d_version_set(struct dentry *dentry,
* If it happened, the negative dentry isn't actually negative
* anymore. So, drop it.
*/
-static int vfat_revalidate_shortname(struct dentry *dentry)
+static bool vfat_revalidate_shortname(struct dentry *dentry, struct inode *dir)
{
- int ret = 1;
- spin_lock(&dentry->d_lock);
- if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry)))
- ret = 0;
- spin_unlock(&dentry->d_lock);
- return ret;
+ return inode_eq_iversion(dir, vfat_d_version(dentry));
}
-static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
+static int vfat_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -61,10 +57,11 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
/* This is not negative dentry. Always valid. */
if (d_really_is_positive(dentry))
return 1;
- return vfat_revalidate_shortname(dentry);
+ return vfat_revalidate_shortname(dentry, dir);
}
-static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
+static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -97,7 +94,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
return 0;
- return vfat_revalidate_shortname(dentry);
+ return vfat_revalidate_shortname(dentry, dir);
}
/* returns the length of a struct qstr, ignoring trailing dots */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index be693a8a1010..198862b086ff 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -175,10 +175,12 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
memset(outarg, 0, sizeof(struct fuse_entry_out));
args->opcode = FUSE_LOOKUP;
args->nodeid = nodeid;
- args->in_numargs = 2;
+ args->in_numargs = 3;
fuse_set_zero_arg0(args);
- args->in_args[1].size = name->len + 1;
+ args->in_args[1].size = name->len;
args->in_args[1].value = name->name;
+ args->in_args[2].size = 1;
+ args->in_args[2].value = "";
args->out_numargs = 1;
args->out_args[0].size = sizeof(struct fuse_entry_out);
args->out_args[0].value = outarg;
@@ -193,10 +195,10 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
* the lookup once more. If the lookup results in the same inode,
* then refresh the attributes, timeouts and mark the dentry valid.
*/
-static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *entry, unsigned int flags)
{
struct inode *inode;
- struct dentry *parent;
struct fuse_mount *fm;
struct fuse_inode *fi;
int ret;
@@ -228,11 +230,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
attr_version = fuse_get_attr_version(fm->fc);
- parent = dget_parent(entry);
- fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
- &entry->d_name, &outarg);
+ fuse_lookup_init(fm->fc, &args, get_node_id(dir),
+ name, &outarg);
ret = fuse_simple_request(fm, &args);
- dput(parent);
/* Zero nodeid is same as -ENOENT */
if (!ret && !outarg.nodeid)
ret = -ENOENT;
@@ -266,9 +266,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
return -ECHILD;
} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
- parent = dget_parent(entry);
- fuse_advise_use_readdirplus(d_inode(parent));
- dput(parent);
+ fuse_advise_use_readdirplus(dir);
}
}
ret = 1;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 2e215e8c3c88..95050e719233 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -21,7 +21,9 @@
/**
* gfs2_drevalidate - Check directory lookup consistency
- * @dentry: the mapping to check
+ * @dir: expected parent directory inode
+ * @name: expexted name
+ * @dentry: dentry to check
* @flags: lookup flags
*
* Check to make sure the lookup necessary to arrive at this inode from its
@@ -30,50 +32,43 @@
* Returns: 1 if the dentry is ok, 0 if it isn't
*/
-static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
+static int gfs2_drevalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct gfs2_sbd *sdp;
- struct gfs2_inode *dip;
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ struct gfs2_inode *dip = GFS2_I(dir);
struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
- int error, valid = 0;
+ int error, valid;
int had_lock = 0;
if (flags & LOOKUP_RCU)
return -ECHILD;
- parent = dget_parent(dentry);
- sdp = GFS2_SB(d_inode(parent));
- dip = GFS2_I(d_inode(parent));
inode = d_inode(dentry);
if (inode) {
if (is_bad_inode(inode))
- goto out;
+ return 0;
ip = GFS2_I(inode);
}
- if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) {
- valid = 1;
- goto out;
- }
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ return 1;
had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
if (!had_lock) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
- goto out;
+ return 0;
}
- error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
+ error = gfs2_dir_check(dir, name, ip);
valid = inode ? !error : (error == -ENOENT);
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
-out:
- dput(parent);
return valid;
}
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 76fa02e3835b..ef54fc8093cf 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -13,7 +13,8 @@
/* dentry case-handling: just lowercase everything */
-static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
+static int hfs_revalidate_dentry(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
int diff;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index d68a4e6ac345..fc8ede43afde 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1576,7 +1576,8 @@ out:
return result;
}
-static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
+static int jfs_ci_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
/*
* This is not negative dentry. Always valid.
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 458519e416fe..5f0f8b95f44c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1109,7 +1109,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
return ERR_PTR(rc);
}
-static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
+static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct kernfs_node *kn;
struct kernfs_root *root;
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b6120b19e99..8444f5cc4064 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1782,7 +1782,7 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
{
const struct dentry *parent;
const struct inode *dir;
- char strbuf[DNAME_INLINE_LEN];
+ union shortname_store strbuf;
struct qstr qstr;
/*
@@ -1802,22 +1802,23 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
if (!dir || !IS_CASEFOLDED(dir))
return 1;
+ qstr.len = len;
+ qstr.name = str;
/*
* If the dentry name is stored in-line, then it may be concurrently
* modified by a rename. If this happens, the VFS will eventually retry
* the lookup, so it doesn't matter what ->d_compare() returns.
* However, it's unsafe to call utf8_strncasecmp() with an unstable
* string. Therefore, we have to copy the name into a temporary buffer.
+ * As above, len is guaranteed to match str, so the shortname case
+ * is exactly when str points to ->d_shortname.
*/
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- str = strbuf;
+ if (qstr.name == dentry->d_shortname.string) {
+ strbuf = dentry->d_shortname; // NUL is guaranteed to be in there
+ qstr.name = strbuf.string;
/* prevent compiler from optimizing out the temporary buffer */
barrier();
}
- qstr.len = len;
- qstr.name = str;
return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
}
diff --git a/fs/namei.c b/fs/namei.c
index 8c82afddd2ad..3ab9440c5b93 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -921,10 +921,11 @@ out_dput:
return false;
}
-static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
+static inline int d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
- return dentry->d_op->d_revalidate(dentry, flags);
+ return dentry->d_op->d_revalidate(dir, name, dentry, flags);
else
return 1;
}
@@ -1652,7 +1653,7 @@ static struct dentry *lookup_dcache(const struct qstr *name,
{
struct dentry *dentry = d_lookup(dir, name);
if (dentry) {
- int error = d_revalidate(dentry, flags);
+ int error = d_revalidate(dir->d_inode, name, dentry, flags);
if (unlikely(error <= 0)) {
if (!error)
d_invalidate(dentry);
@@ -1737,19 +1738,20 @@ static struct dentry *lookup_fast(struct nameidata *nd)
if (read_seqcount_retry(&parent->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
- status = d_revalidate(dentry, nd->flags);
+ status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags);
if (likely(status > 0))
return dentry;
if (!try_to_unlazy_next(nd, dentry))
return ERR_PTR(-ECHILD);
if (status == -ECHILD)
/* we'd been told to redo it in non-rcu mode */
- status = d_revalidate(dentry, nd->flags);
+ status = d_revalidate(nd->inode, &nd->last,
+ dentry, nd->flags);
} else {
dentry = __d_lookup(parent, &nd->last);
if (unlikely(!dentry))
return NULL;
- status = d_revalidate(dentry, nd->flags);
+ status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags);
}
if (unlikely(status <= 0)) {
if (!status)
@@ -1777,7 +1779,7 @@ again:
if (IS_ERR(dentry))
return dentry;
if (unlikely(!d_in_lookup(dentry))) {
- int error = d_revalidate(dentry, flags);
+ int error = d_revalidate(inode, name, dentry, flags);
if (unlikely(error <= 0)) {
if (!error) {
d_invalidate(dentry);
@@ -3575,7 +3577,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (d_in_lookup(dentry))
break;
- error = d_revalidate(dentry, nd->flags);
+ error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags);
if (likely(error > 0))
break;
if (error)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 492cffd9d3d8..2b04038b0e40 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1672,7 +1672,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
-static int nfs_lookup_revalidate_dentry(struct inode *dir,
+static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name,
struct dentry *dentry,
struct inode *inode, unsigned int flags)
{
@@ -1690,7 +1690,7 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir,
goto out;
dir_verifier = nfs_save_change_attribute(dir);
- ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr);
if (ret < 0)
goto out;
@@ -1732,8 +1732,8 @@ out:
* cached dentry and do a new lookup.
*/
static int
-nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
int error = 0;
@@ -1775,7 +1775,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (NFS_STALE(inode))
goto out_bad;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
@@ -1785,38 +1785,26 @@ out_bad:
}
static int
-__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
- int (*reval)(struct inode *, struct dentry *, unsigned int))
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct inode *dir;
- int ret;
-
if (flags & LOOKUP_RCU) {
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
return -ECHILD;
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- ret = reval(dir, dentry, flags);
- if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
} else {
/* Wait for unlink to complete - see unblock_revalidate() */
wait_var_event(&dentry->d_fsdata,
smp_load_acquire(&dentry->d_fsdata)
!= NFS_FSDATA_BLOCKED);
- parent = dget_parent(dentry);
- ret = reval(d_inode(parent), dentry, flags);
- dput(parent);
}
- return ret;
+ return 0;
}
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
}
static void block_revalidate(struct dentry *dentry)
@@ -1982,7 +1970,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error == -ENOENT) {
if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
dir_verifier = inode_peek_iversion_raw(dir);
@@ -2025,7 +2014,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
+static int nfs4_lookup_revalidate(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
@@ -2214,11 +2204,14 @@ no_open:
EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int
-nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
@@ -2254,16 +2247,10 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
full_reval:
- return nfs_do_lookup_revalidate(dir, dentry, flags);
-}
-
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
-{
- return __nfs_lookup_revalidate(dentry, flags,
- nfs4_do_lookup_revalidate);
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
}
#endif /* CONFIG_NFSV4 */
@@ -2319,7 +2306,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error)
goto out_error;
}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2d53574da605..973aed9cc5fe 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -308,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server)
int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name,
ctx->mntfh, ctx->clone_data.fattr);
dput(parent);
if (err != 0)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7359e1a3bd84..0c3bc98cd999 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
}
static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
unsigned short task_flags = 0;
@@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call lookup %pd2\n", dentry);
- return __nfs3_proc_lookup(dir, dentry->d_name.name,
- dentry->d_name.len, fhandle, fattr,
+ return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr,
task_flags);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d615d520f8cf..df9669d4ded7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4544,15 +4544,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
int status;
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = &dentry->d_name,
+ .name = name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4594,17 +4594,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
- const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
+ err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4639,13 +4638,13 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4660,7 +4659,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 6c09cd090c34..77920a2e3cef 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = dentry->d_name.name,
- .len = dentry->d_name.len
+ .name = name->name,
+ .len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 8d789b017fa9..af94e3737470 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -787,7 +787,8 @@ pack_runs:
if (err)
goto out;
- attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
+ attr = mi_find_attr(ni, mi, NULL, type, name, name_len,
+ &le->id);
if (!attr) {
err = -EINVAL;
goto bad_inode;
@@ -1181,7 +1182,7 @@ repack:
goto out;
}
- attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, &le->id);
+ attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, &le->id);
if (!attr) {
err = -EINVAL;
goto out;
@@ -1406,7 +1407,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
*/
if (!attr->non_res) {
if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) {
- ntfs_inode_err(&ni->vfs_inode, "is corrupted");
+ _ntfs_bad_inode(&ni->vfs_inode);
return -EINVAL;
}
addr = resident_data(attr);
@@ -1796,7 +1797,7 @@ repack:
goto out;
}
- attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0,
+ attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0,
&le->id);
if (!attr) {
err = -EINVAL;
@@ -2041,8 +2042,8 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
}
/* Look for required attribute. */
- attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL,
- 0, &le->id);
+ attr = mi_find_attr(ni, mi, NULL, ATTR_DATA,
+ NULL, 0, &le->id);
if (!attr) {
err = -EINVAL;
goto out;
@@ -2587,7 +2588,7 @@ int attr_force_nonresident(struct ntfs_inode *ni)
attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, &mi);
if (!attr) {
- ntfs_bad_inode(&ni->vfs_inode, "no data attribute");
+ _ntfs_bad_inode(&ni->vfs_inode);
return -ENOENT;
}
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index fc6a8aa29e3a..b6da80c69ca6 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -512,7 +512,7 @@ out:
ctx->pos = pos;
} else if (err < 0) {
if (err == -EINVAL)
- ntfs_inode_err(dir, "directory corrupted");
+ _ntfs_bad_inode(dir);
ctx->pos = eod;
}
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 8b39d0ce5f28..5df6a0b5add9 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -75,7 +75,7 @@ struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni)
{
const struct ATTRIB *attr;
- attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
+ attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) :
NULL;
}
@@ -89,7 +89,7 @@ struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni)
{
const struct ATTRIB *attr;
- attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
+ attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) :
NULL;
@@ -148,8 +148,10 @@ int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi)
goto out;
err = mi_get(ni->mi.sbi, rno, &r);
- if (err)
+ if (err) {
+ _ntfs_bad_inode(&ni->vfs_inode);
return err;
+ }
ni_add_mi(ni, r);
@@ -201,7 +203,8 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
*mi = &ni->mi;
/* Look for required attribute in primary record. */
- return mi_find_attr(&ni->mi, attr, type, name, name_len, NULL);
+ return mi_find_attr(ni, &ni->mi, attr, type, name, name_len,
+ NULL);
}
/* First look for list entry of required type. */
@@ -217,7 +220,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
return NULL;
/* Look for required attribute. */
- attr = mi_find_attr(m, NULL, type, name, name_len, &le->id);
+ attr = mi_find_attr(ni, m, NULL, type, name, name_len, &le->id);
if (!attr)
goto out;
@@ -238,8 +241,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
return attr;
out:
- ntfs_inode_err(&ni->vfs_inode, "failed to parse mft record");
- ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
+ _ntfs_bad_inode(&ni->vfs_inode);
return NULL;
}
@@ -259,7 +261,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
if (mi)
*mi = &ni->mi;
/* Enum attributes in primary record. */
- return mi_enum_attr(&ni->mi, attr);
+ return mi_enum_attr(ni, &ni->mi, attr);
}
/* Get next list entry. */
@@ -275,7 +277,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
*mi = mi2;
/* Find attribute in loaded record. */
- return rec_find_attr_le(mi2, le2);
+ return rec_find_attr_le(ni, mi2, le2);
}
/*
@@ -293,7 +295,8 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
if (!ni->attr_list.size) {
if (pmi)
*pmi = &ni->mi;
- return mi_find_attr(&ni->mi, NULL, type, name, name_len, NULL);
+ return mi_find_attr(ni, &ni->mi, NULL, type, name, name_len,
+ NULL);
}
le = al_find_ex(ni, NULL, type, name, name_len, NULL);
@@ -319,7 +322,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
if (pmi)
*pmi = mi;
- attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
+ attr = mi_find_attr(ni, mi, NULL, type, name, name_len, &le->id);
if (!attr)
return NULL;
@@ -330,6 +333,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
vcn <= le64_to_cpu(attr->nres.evcn))
return attr;
+ _ntfs_bad_inode(&ni->vfs_inode);
return NULL;
}
@@ -398,7 +402,8 @@ int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
int diff;
if (base_only || type == ATTR_LIST || !ni->attr_list.size) {
- attr = mi_find_attr(&ni->mi, NULL, type, name, name_len, id);
+ attr = mi_find_attr(ni, &ni->mi, NULL, type, name, name_len,
+ id);
if (!attr)
return -ENOENT;
@@ -437,7 +442,7 @@ next_le2:
al_remove_le(ni, le);
- attr = mi_find_attr(mi, NULL, type, name, name_len, id);
+ attr = mi_find_attr(ni, mi, NULL, type, name, name_len, id);
if (!attr)
return -ENOENT;
@@ -485,7 +490,7 @@ ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi,
name = le->name;
}
- attr = mi_insert_attr(mi, type, name, name_len, asize, name_off);
+ attr = mi_insert_attr(ni, mi, type, name, name_len, asize, name_off);
if (!attr) {
if (le_added)
al_remove_le(ni, le);
@@ -673,7 +678,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
if (err)
return err;
- attr_list = mi_find_attr(&ni->mi, NULL, ATTR_LIST, NULL, 0, NULL);
+ attr_list = mi_find_attr(ni, &ni->mi, NULL, ATTR_LIST, NULL, 0, NULL);
if (!attr_list)
return 0;
@@ -695,7 +700,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
if (!mi)
return 0;
- attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+ attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le),
le->name_len, &le->id);
if (!attr)
return 0;
@@ -731,7 +736,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
goto out;
}
- attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+ attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le),
le->name_len, &le->id);
if (!attr) {
/* Should never happened, 'cause already checked. */
@@ -740,7 +745,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
asize = le32_to_cpu(attr->size);
/* Insert into primary record. */
- attr_ins = mi_insert_attr(&ni->mi, le->type, le_name(le),
+ attr_ins = mi_insert_attr(ni, &ni->mi, le->type, le_name(le),
le->name_len, asize,
le16_to_cpu(attr->name_off));
if (!attr_ins) {
@@ -768,7 +773,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
if (!mi)
continue;
- attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+ attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le),
le->name_len, &le->id);
if (!attr)
continue;
@@ -831,7 +836,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
free_b = 0;
attr = NULL;
- for (; (attr = mi_enum_attr(&ni->mi, attr)); le = Add2Ptr(le, sz)) {
+ for (; (attr = mi_enum_attr(ni, &ni->mi, attr)); le = Add2Ptr(le, sz)) {
sz = le_size(attr->name_len);
le->type = attr->type;
le->size = cpu_to_le16(sz);
@@ -886,7 +891,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
u32 asize = le32_to_cpu(b->size);
u16 name_off = le16_to_cpu(b->name_off);
- attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off),
+ attr = mi_insert_attr(ni, mi, b->type, Add2Ptr(b, name_off),
b->name_len, asize, name_off);
if (!attr)
goto out;
@@ -909,7 +914,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
goto out;
}
- attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0,
+ attr = mi_insert_attr(ni, &ni->mi, ATTR_LIST, NULL, 0,
lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT);
if (!attr)
goto out;
@@ -993,13 +998,13 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le,
mi = rb_entry(node, struct mft_inode, node);
if (is_mft_data &&
- (mi_enum_attr(mi, NULL) ||
+ (mi_enum_attr(ni, mi, NULL) ||
vbo <= ((u64)mi->rno << sbi->record_bits))) {
/* We can't accept this record 'cause MFT's bootstrapping. */
continue;
}
if (is_mft &&
- mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, NULL)) {
+ mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, NULL)) {
/*
* This child record already has a ATTR_DATA.
* So it can't accept any other records.
@@ -1008,7 +1013,7 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le,
}
if ((type != ATTR_NAME || name_len) &&
- mi_find_attr(mi, NULL, type, name, name_len, NULL)) {
+ mi_find_attr(ni, mi, NULL, type, name, name_len, NULL)) {
/* Only indexed attributes can share same record. */
continue;
}
@@ -1157,7 +1162,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
/* Estimate the result of moving all possible attributes away. */
attr = NULL;
- while ((attr = mi_enum_attr(&ni->mi, attr))) {
+ while ((attr = mi_enum_attr(ni, &ni->mi, attr))) {
if (attr->type == ATTR_STD)
continue;
if (attr->type == ATTR_LIST)
@@ -1175,7 +1180,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
attr = NULL;
for (;;) {
- attr = mi_enum_attr(&ni->mi, attr);
+ attr = mi_enum_attr(ni, &ni->mi, attr);
if (!attr) {
/* We should never be here 'cause we have already check this case. */
err = -EINVAL;
@@ -1259,7 +1264,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) {
mi = rb_entry(node, struct mft_inode, node);
- attr = mi_enum_attr(mi, NULL);
+ attr = mi_enum_attr(ni, mi, NULL);
if (!attr) {
mft_min = mi->rno;
@@ -1280,7 +1285,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
ni_remove_mi(ni, mi_new);
}
- attr = mi_find_attr(&ni->mi, NULL, ATTR_DATA, NULL, 0, NULL);
+ attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_DATA, NULL, 0, NULL);
if (!attr) {
err = -EINVAL;
goto out;
@@ -1397,7 +1402,7 @@ int ni_expand_list(struct ntfs_inode *ni)
continue;
/* Find attribute in primary record. */
- attr = rec_find_attr_le(&ni->mi, le);
+ attr = rec_find_attr_le(ni, &ni->mi, le);
if (!attr) {
err = -EINVAL;
goto out;
@@ -1604,8 +1609,8 @@ int ni_delete_all(struct ntfs_inode *ni)
roff = le16_to_cpu(attr->nres.run_off);
if (roff > asize) {
- _ntfs_bad_inode(&ni->vfs_inode);
- return -EINVAL;
+ /* ni_enum_attr_ex checks this case. */
+ continue;
}
/* run==1 means unpack and deallocate. */
@@ -2726,9 +2731,10 @@ int ni_write_frame(struct ntfs_inode *ni, struct page **pages,
{
int err;
struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct folio *folio = page_folio(pages[0]);
u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
u32 frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT;
- u64 frame_vbo = (u64)pages[0]->index << PAGE_SHIFT;
+ u64 frame_vbo = folio_pos(folio);
CLST frame = frame_vbo >> frame_bits;
char *frame_ondisk = NULL;
struct page **pages_disk = NULL;
@@ -3343,7 +3349,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
if (!mi->dirty)
continue;
- is_empty = !mi_enum_attr(mi, NULL);
+ is_empty = !mi_enum_attr(ni, mi, NULL);
if (is_empty)
clear_rec_inuse(mi->mrec);
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 03471bc9371c..938d351ebac7 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -908,7 +908,11 @@ void ntfs_bad_inode(struct inode *inode, const char *hint)
ntfs_inode_err(inode, "%s", hint);
make_bad_inode(inode);
- ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ /* Avoid recursion if bad inode is $Volume. */
+ if (inode->i_ino != MFT_REC_VOL &&
+ !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) {
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ }
}
/*
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 9089c58a005c..7eb9fae22f8d 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1094,8 +1094,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn,
ok:
if (!index_buf_check(ib, bytes, &vbn)) {
- ntfs_inode_err(&ni->vfs_inode, "directory corrupted");
- ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
+ _ntfs_bad_inode(&ni->vfs_inode);
err = -EINVAL;
goto out;
}
@@ -1117,8 +1116,7 @@ ok:
out:
if (err == -E_NTFS_CORRUPT) {
- ntfs_inode_err(&ni->vfs_inode, "directory corrupted");
- ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
+ _ntfs_bad_inode(&ni->vfs_inode);
err = -EINVAL;
}
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index be04d2845bb7..a1e11228dafd 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -410,6 +410,9 @@ end_enum:
if (!std5)
goto out;
+ if (is_bad_inode(inode))
+ goto out;
+
if (!is_match && name) {
err = -ENOENT;
goto out;
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index cd8e8374bb5a..382820464dee 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -745,23 +745,24 @@ int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi);
void mi_put(struct mft_inode *mi);
int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno);
int mi_read(struct mft_inode *mi, bool is_mft);
-struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr);
-// TODO: id?
-struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
- enum ATTR_TYPE type, const __le16 *name,
- u8 name_len, const __le16 *id);
-static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec,
+struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ struct ATTRIB *attr);
+struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ struct ATTRIB *attr, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, const __le16 *id);
+static inline struct ATTRIB *rec_find_attr_le(struct ntfs_inode *ni,
+ struct mft_inode *rec,
struct ATTR_LIST_ENTRY *le)
{
- return mi_find_attr(rec, NULL, le->type, le_name(le), le->name_len,
+ return mi_find_attr(ni, rec, NULL, le->type, le_name(le), le->name_len,
&le->id);
}
int mi_write(struct mft_inode *mi, int wait);
int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
__le16 flags, bool is_mft);
-struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
- const __le16 *name, u8 name_len, u32 asize,
- u16 name_off);
+struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ enum ATTR_TYPE type, const __le16 *name,
+ u8 name_len, u32 asize, u16 name_off);
bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
struct ATTRIB *attr);
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 61d53d39f3b9..714c7ecedca8 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -31,7 +31,7 @@ static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type,
*
* Return: Unused attribute id that is less than mrec->next_attr_id.
*/
-static __le16 mi_new_attt_id(struct mft_inode *mi)
+static __le16 mi_new_attt_id(struct ntfs_inode *ni, struct mft_inode *mi)
{
u16 free_id, max_id, t16;
struct MFT_REC *rec = mi->mrec;
@@ -52,7 +52,7 @@ static __le16 mi_new_attt_id(struct mft_inode *mi)
attr = NULL;
for (;;) {
- attr = mi_enum_attr(mi, attr);
+ attr = mi_enum_attr(ni, mi, attr);
if (!attr) {
rec->next_attr_id = cpu_to_le16(max_id + 1);
mi->dirty = true;
@@ -195,7 +195,8 @@ out:
* NOTE: mi->mrec - memory of size sbi->record_size
* here we sure that mi->mrec->total == sbi->record_size (see mi_read)
*/
-struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
+struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ struct ATTRIB *attr)
{
const struct MFT_REC *rec = mi->mrec;
u32 used = le32_to_cpu(rec->used);
@@ -209,11 +210,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
off = le16_to_cpu(rec->attr_off);
if (used > total)
- return NULL;
+ goto out;
if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 ||
!IS_ALIGNED(off, 8)) {
- return NULL;
+ goto out;
}
/* Skip non-resident records. */
@@ -243,7 +244,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
*/
if (off + 8 > used) {
static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8);
- return NULL;
+ goto out;
}
if (attr->type == ATTR_END) {
@@ -254,112 +255,116 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
/* 0x100 is last known attribute for now. */
t32 = le32_to_cpu(attr->type);
if (!t32 || (t32 & 0xf) || (t32 > 0x100))
- return NULL;
+ goto out;
/* attributes in record must be ordered by type */
if (t32 < prev_type)
- return NULL;
+ goto out;
asize = le32_to_cpu(attr->size);
if (!IS_ALIGNED(asize, 8))
- return NULL;
+ goto out;
/* Check overflow and boundary. */
if (off + asize < off || off + asize > used)
- return NULL;
+ goto out;
/* Can we use the field attr->non_res. */
if (off + 9 > used)
- return NULL;
+ goto out;
/* Check size of attribute. */
if (!attr->non_res) {
/* Check resident fields. */
if (asize < SIZEOF_RESIDENT)
- return NULL;
+ goto out;
t16 = le16_to_cpu(attr->res.data_off);
if (t16 > asize)
- return NULL;
+ goto out;
if (le32_to_cpu(attr->res.data_size) > asize - t16)
- return NULL;
+ goto out;
t32 = sizeof(short) * attr->name_len;
if (t32 && le16_to_cpu(attr->name_off) + t32 > t16)
- return NULL;
+ goto out;
return attr;
}
/* Check nonresident fields. */
if (attr->non_res != 1)
- return NULL;
+ goto out;
/* Can we use memory including attr->nres.valid_size? */
if (asize < SIZEOF_NONRESIDENT)
- return NULL;
+ goto out;
t16 = le16_to_cpu(attr->nres.run_off);
if (t16 > asize)
- return NULL;
+ goto out;
t32 = sizeof(short) * attr->name_len;
if (t32 && le16_to_cpu(attr->name_off) + t32 > t16)
- return NULL;
+ goto out;
/* Check start/end vcn. */
if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1)
- return NULL;
+ goto out;
data_size = le64_to_cpu(attr->nres.data_size);
if (le64_to_cpu(attr->nres.valid_size) > data_size)
- return NULL;
+ goto out;
alloc_size = le64_to_cpu(attr->nres.alloc_size);
if (data_size > alloc_size)
- return NULL;
+ goto out;
t32 = mi->sbi->cluster_mask;
if (alloc_size & t32)
- return NULL;
+ goto out;
if (!attr->nres.svcn && is_attr_ext(attr)) {
/* First segment of sparse/compressed attribute */
/* Can we use memory including attr->nres.total_size? */
if (asize < SIZEOF_NONRESIDENT_EX)
- return NULL;
+ goto out;
tot_size = le64_to_cpu(attr->nres.total_size);
if (tot_size & t32)
- return NULL;
+ goto out;
if (tot_size > alloc_size)
- return NULL;
+ goto out;
} else {
if (attr->nres.c_unit)
- return NULL;
+ goto out;
if (alloc_size > mi->sbi->volume.size)
- return NULL;
+ goto out;
}
return attr;
+
+out:
+ _ntfs_bad_inode(&ni->vfs_inode);
+ return NULL;
}
/*
* mi_find_attr - Find the attribute by type and name and id.
*/
-struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
- enum ATTR_TYPE type, const __le16 *name,
- u8 name_len, const __le16 *id)
+struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ struct ATTRIB *attr, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, const __le16 *id)
{
u32 type_in = le32_to_cpu(type);
u32 atype;
next_attr:
- attr = mi_enum_attr(mi, attr);
+ attr = mi_enum_attr(ni, mi, attr);
if (!attr)
return NULL;
@@ -467,9 +472,9 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
*
* Return: Not full constructed attribute or NULL if not possible to create.
*/
-struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
- const __le16 *name, u8 name_len, u32 asize,
- u16 name_off)
+struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+ enum ATTR_TYPE type, const __le16 *name,
+ u8 name_len, u32 asize, u16 name_off)
{
size_t tail;
struct ATTRIB *attr;
@@ -488,7 +493,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
* at which we should insert it.
*/
attr = NULL;
- while ((attr = mi_enum_attr(mi, attr))) {
+ while ((attr = mi_enum_attr(ni, mi, attr))) {
int diff = compare_attr(attr, type, name, name_len, upcase);
if (diff < 0)
@@ -508,7 +513,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
tail = used - PtrOffset(rec, attr);
}
- id = mi_new_attt_id(mi);
+ id = mi_new_attt_id(ni, mi);
memmove(Add2Ptr(attr, asize), attr, tail);
memset(attr, 0, asize);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index a9b8688aaf30..1873bbbb7e5b 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
}
-static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
int ret = 0; /* if all else fails, just return false */
@@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
inode = d_inode(dentry);
osb = OCFS2_SB(dentry->d_sb);
- trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
- dentry->d_name.name);
+ trace_ocfs2_dentry_revalidate(dentry, name->len, name->name);
/* For a negative dentry -
* check the generation number of the parent and compare with the
@@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
*/
if (inode == NULL) {
unsigned long gen = (unsigned long) dentry->d_fsdata;
- unsigned long pgen;
- spin_lock(&dentry->d_lock);
- pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
- spin_unlock(&dentry->d_lock);
- trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
- dentry->d_name.name,
+ unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen;
+ trace_ocfs2_dentry_revalidate_negative(name->len, name->name,
pgen, gen);
if (gen != pgen)
goto bail;
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index 395a00ed8ac7..a19d1ad705db 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -13,10 +13,9 @@
#include "orangefs-kernel.h"
/* Returns 1 if dentry can still be trusted, else 0. */
-static int orangefs_revalidate_lookup(struct dentry *dentry)
+static int orangefs_revalidate_lookup(struct inode *parent_inode, const struct qstr *name,
+ struct dentry *dentry)
{
- struct dentry *parent_dentry = dget_parent(dentry);
- struct inode *parent_inode = parent_dentry->d_inode;
struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode);
struct inode *inode = dentry->d_inode;
struct orangefs_kernel_op_s *new_op;
@@ -26,14 +25,14 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
- if (!new_op) {
- ret = -ENOMEM;
- goto out_put_parent;
- }
+ if (!new_op)
+ return -ENOMEM;
new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
new_op->upcall.req.lookup.parent_refn = parent->refn;
- strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name);
+ /* op_alloc() leaves ->upcall zeroed */
+ memcpy(new_op->upcall.req.lookup.d_name, name->name,
+ min(name->len, ORANGEFS_NAME_MAX - 1));
gossip_debug(GOSSIP_DCACHE_DEBUG,
"%s:%s:%d interrupt flag [%d]\n",
@@ -78,8 +77,6 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
ret = 1;
out_release_op:
op_release(new_op);
-out_put_parent:
- dput(parent_dentry);
return ret;
out_drop:
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n",
@@ -92,7 +89,8 @@ out_drop:
*
* Should return 1 if dentry can still be trusted, else 0.
*/
-static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int orangefs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
int ret;
unsigned long time = (unsigned long) dentry->d_fsdata;
@@ -114,7 +112,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
* If this passes, the positive dentry still exists or the negative
* dentry still does not exist.
*/
- if (!orangefs_revalidate_lookup(dentry))
+ if (!orangefs_revalidate_lookup(dir, name, dentry))
return 0;
/* We do not need to continue with negative dentries. */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fe511192f83c..86ae6f6da36b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -91,7 +91,24 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
ret = d->d_op->d_weak_revalidate(d, flags);
} else if (d->d_flags & DCACHE_OP_REVALIDATE) {
- ret = d->d_op->d_revalidate(d, flags);
+ struct dentry *parent;
+ struct inode *dir;
+ struct name_snapshot n;
+
+ if (flags & LOOKUP_RCU) {
+ parent = READ_ONCE(d->d_parent);
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(d);
+ dir = d_inode(parent);
+ }
+ take_dentry_name_snapshot(&n, d);
+ ret = d->d_op->d_revalidate(dir, &n.name, d, flags);
+ release_dentry_name_snapshot(&n);
+ if (!(flags & LOOKUP_RCU))
+ dput(parent);
if (!ret) {
if (!(flags & LOOKUP_RCU))
d_invalidate(d);
@@ -127,7 +144,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry,
return ret;
}
-static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
return ovl_dentry_revalidate_common(dentry, flags, false);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a50b222a5917..cd89e956c322 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2058,7 +2058,8 @@ void pid_update_inode(struct task_struct *task, struct inode *inode)
* performed a setuid(), etc.
*
*/
-static int pid_revalidate(struct dentry *dentry, unsigned int flags)
+static int pid_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
@@ -2191,7 +2192,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
return 0;
}
-static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int map_files_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
unsigned long vm_start, vm_end;
bool exact_vma_exists = false;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 24baf23e864f..37aa778d1af7 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -140,7 +140,8 @@ static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
security_task_to_inode(task, inode);
}
-static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
+static int tid_fd_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct task_struct *task;
struct inode *inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dbe82cf23ee4..8ec90826a49e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -216,7 +216,8 @@ void proc_free_inum(unsigned int inum)
ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
}
-static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int proc_misc_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -343,7 +344,8 @@ static const struct file_operations proc_dir_operations = {
.iterate_shared = proc_readdir,
};
-static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int proc_net_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
return 0;
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 27a283d85a6e..cc9d74a06ff0 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -884,7 +884,8 @@ static const struct inode_operations proc_sys_dir_operations = {
.getattr = proc_sys_getattr,
};
-static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
+static int proc_sys_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 1822493dd084..d1e95632ac54 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -737,7 +737,8 @@ again:
}
static int
-cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
+cifs_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *direntry, unsigned int flags)
{
struct inode *inode;
int rc;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index cfc614c638da..53214499e384 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -457,7 +457,8 @@ static void tracefs_d_release(struct dentry *dentry)
eventfs_d_release(dentry);
}
-static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct eventfs_inode *ei = dentry->d_fsdata;
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 5f1a14d5b927..a859ac9b74ba 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -192,7 +192,8 @@ const struct file_operations vboxsf_dir_fops = {
* This is called during name resolution/lookup to check if the @dentry in
* the cache is still valid. the job is handled by vboxsf_inode_revalidate.
*/
-static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int vboxsf_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bff956f7b2b9..9a1a30857763 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -68,16 +68,24 @@ extern const struct qstr dotdot_name;
* large memory footprint increase).
*/
#ifdef CONFIG_64BIT
-# define DNAME_INLINE_LEN 40 /* 192 bytes */
+# define DNAME_INLINE_WORDS 5 /* 192 bytes */
#else
# ifdef CONFIG_SMP
-# define DNAME_INLINE_LEN 36 /* 128 bytes */
+# define DNAME_INLINE_WORDS 9 /* 128 bytes */
# else
-# define DNAME_INLINE_LEN 44 /* 128 bytes */
+# define DNAME_INLINE_WORDS 11 /* 128 bytes */
# endif
#endif
+#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long))
+
+union shortname_store {
+ unsigned char string[DNAME_INLINE_LEN];
+ unsigned long words[DNAME_INLINE_WORDS];
+};
+
#define d_lock d_lockref.lock
+#define d_iname d_shortname.string
struct dentry {
/* RCU lookup touched fields */
@@ -88,7 +96,7 @@ struct dentry {
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
- unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+ union shortname_store d_shortname;
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
/* Ref lookup also touches following */
@@ -136,7 +144,8 @@ enum d_real_type {
};
struct dentry_operations {
- int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_revalidate)(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
int (*d_hash)(const struct dentry *, struct qstr *);
int (*d_compare)(const struct dentry *,
@@ -150,6 +159,8 @@ struct dentry_operations {
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
+ bool (*d_unalias_trylock)(const struct dentry *);
+ void (*d_unalias_unlock)(const struct dentry *);
} ____cacheline_aligned;
/*
@@ -589,7 +600,7 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
struct name_snapshot {
struct qstr name;
- unsigned char inline_name[DNAME_INLINE_LEN];
+ union shortname_store inline_name;
};
void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
void release_dentry_name_snapshot(struct name_snapshot *);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 772f822dc6b8..18855cb44b1c 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -192,7 +192,8 @@ struct fscrypt_operations {
unsigned int *num_devs);
};
-int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
+int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags);
static inline struct fscrypt_inode_info *
fscrypt_get_inode_info(const struct inode *inode)
@@ -711,8 +712,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir,
return 0;
}
-static inline int fscrypt_d_revalidate(struct dentry *dentry,
- unsigned int flags)
+static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
return 1;
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8da4c61f97b9..2a59034a5fa2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1085,8 +1085,8 @@ struct netdev_net_notifier {
*
* int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
* Old-style ioctl entry point. This is used internally by the
- * appletalk and ieee802154 subsystems but is no longer called by
- * the device ioctl handler.
+ * ieee802154 subsystem but is no longer called by the device
+ * ioctl handler.
*
* int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd);
* Used by the bonding driver for its device specific ioctls:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 162b7c0c3555..9155a6ffc370 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1781,7 +1781,7 @@ struct nfs_rpc_ops {
struct nfs_fattr *, struct inode *);
int (*setattr) (struct dentry *, struct nfs_fattr *,
struct iattr *);
- int (*lookup) (struct inode *, struct dentry *,
+ int (*lookup) (struct inode *, struct dentry *, const struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
int (*lookupp) (struct inode *, struct nfs_fh *,
struct nfs_fattr *);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 08c37b83fea8..78855d794342 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \
#ifdef CONFIG_PM
#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns)
-#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
-#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns")
#else
#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns)
-#define EXPORT_PM_FN_GPL(name)
-#define EXPORT_PM_FN_NS_GPL(name, ns)
#endif
#ifdef CONFIG_PM_SLEEP
@@ -684,6 +680,7 @@ struct dev_pm_info {
bool no_pm_callbacks:1; /* Owned by the PM core */
bool async_in_progress:1; /* Owned by the PM core */
bool must_resume:1; /* Owned by the PM core */
+ bool set_active:1; /* Owned by the PM core */
bool may_skip_resume:1; /* Set by subsystems */
#else
bool should_wakeup:1;
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index ed4cd114180a..7f405672b089 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -237,7 +237,6 @@ struct page_pool {
struct {
struct hlist_node list;
u64 detach_time;
- u32 napi_id;
u32 id;
} user;
};
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4b0677e48190..ed4b83696c77 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1268,9 +1268,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (xo) {
x = xfrm_input_state(skb);
- if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
- return (xo->flags & CRYPTO_DONE) &&
- (xo->status & CRYPTO_SUCCESS);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ bool check = (xo->flags & CRYPTO_DONE) &&
+ (xo->status & CRYPTO_SUCCESS);
+
+ /* The packets here are plain ones and secpath was
+ * needed to indicate that hardware already handled
+ * them and there is no need to do nothing in addition.
+ *
+ * Consume secpath which was set by drivers.
+ */
+ secpath_reset(skb);
+ return check;
+ }
}
return __xfrm_check_nopolicy(net, skb, dir) ||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1f87aa01ba44..10a01af63a80 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -608,7 +608,11 @@ int hibernation_platform_enter(void)
local_irq_disable();
system_state = SYSTEM_SUSPEND;
- syscore_suspend();
+
+ error = syscore_suspend();
+ if (error)
+ goto Enable_irqs;
+
if (pm_wakeup_pending()) {
error = -EAGAIN;
goto Power_up;
@@ -620,6 +624,7 @@ int hibernation_platform_enter(void)
Power_up:
syscore_resume();
+ Enable_irqs:
system_state = SYSTEM_RUNNING;
local_irq_enable();
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a2a29e3fffca..1a19d69b91ed 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
}
sg_policy->thread = thread;
- kthread_bind_mask(thread, policy->related_cpus);
+ if (policy->dvfs_possible_from_any_cpu)
+ set_cpus_allowed_ptr(thread, policy->related_cpus);
+ else
+ kthread_bind_mask(thread, policy->related_cpus);
+
init_irq_work(&sg_policy->irq_work, sugov_irq_work);
mutex_init(&sg_policy->work_lock);
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 149e2c8036d3..456d339be98f 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1130,6 +1130,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
return 0;
/*
+ * The special/sugov task isn't part of regular bandwidth/admission
+ * control so let userspace change affinities.
+ */
+ if (dl_entity_is_special(&p->dl))
+ return 0;
+
+ /*
* Since bandwidth control happens on root_domain basis,
* if admission test is enabled, we only admit -deadline
* tasks allowed to run on all the CPUs in the task's
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 49f97d4138ea..46ea0bee2259 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -710,12 +710,12 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu)
{
switch (chan->scid) {
case L2CAP_CID_ATT:
- if (mtu < L2CAP_LE_MIN_MTU)
+ if (mtu && mtu < L2CAP_LE_MIN_MTU)
return false;
break;
default:
- if (mtu < L2CAP_DEFAULT_MIN_MTU)
+ if (mtu && mtu < L2CAP_DEFAULT_MIN_MTU)
return false;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index afa2282f2604..c0021cbd28fc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6708,7 +6708,7 @@ void napi_resume_irqs(unsigned int napi_id)
static void __napi_hash_add_with_id(struct napi_struct *napi,
unsigned int napi_id)
{
- napi->napi_id = napi_id;
+ WRITE_ONCE(napi->napi_id, napi_id);
hlist_add_head_rcu(&napi->napi_hash_node,
&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
}
@@ -9924,6 +9924,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Program bound to different device");
return -EINVAL;
}
+ if (bpf_prog_is_dev_bound(new_prog->aux) && mode == XDP_MODE_SKB) {
+ NL_SET_ERR_MSG(extack, "Can't attach device-bound programs in generic mode");
+ return -EINVAL;
+ }
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
return -EINVAL;
@@ -10260,37 +10264,14 @@ static bool from_cleanup_net(void)
#endif
}
-static void rtnl_drop_if_cleanup_net(void)
-{
- if (from_cleanup_net())
- __rtnl_unlock();
-}
-
-static void rtnl_acquire_if_cleanup_net(void)
-{
- if (from_cleanup_net())
- rtnl_lock();
-}
-
/* Delayed registration/unregisteration */
LIST_HEAD(net_todo_list);
-static LIST_HEAD(net_todo_list_for_cleanup_net);
-
-/* TODO: net_todo_list/net_todo_list_for_cleanup_net should probably
- * be provided by callers, instead of being static, rtnl protected.
- */
-static struct list_head *todo_list(void)
-{
- return from_cleanup_net() ? &net_todo_list_for_cleanup_net :
- &net_todo_list;
-}
-
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
atomic_t dev_unreg_count = ATOMIC_INIT(0);
static void net_set_todo(struct net_device *dev)
{
- list_add_tail(&dev->todo_list, todo_list());
+ list_add_tail(&dev->todo_list, &net_todo_list);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -11140,7 +11121,7 @@ void netdev_run_todo(void)
#endif
/* Snapshot list, allow later requests */
- list_replace_init(todo_list(), &list);
+ list_replace_init(&net_todo_list, &list);
__rtnl_unlock();
@@ -11785,11 +11766,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
netdev_unlock(dev);
}
-
- rtnl_drop_if_cleanup_net();
flush_all_backlogs();
+
synchronize_net();
- rtnl_acquire_if_cleanup_net();
list_for_each_entry(dev, head, unreg_list) {
struct sk_buff *skb = NULL;
@@ -11849,9 +11828,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
#endif
}
- rtnl_drop_if_cleanup_net();
synchronize_net();
- rtnl_acquire_if_cleanup_net();
list_for_each_entry(dev, head, unreg_list) {
netdev_put(dev, &dev->dev_registered_tracker);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1906c62dee85..f5e908c9e7ad 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -1146,7 +1146,9 @@ void page_pool_disable_direct_recycling(struct page_pool *pool)
WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
+ mutex_lock(&page_pools_lock);
WRITE_ONCE(pool->p.napi, NULL);
+ mutex_unlock(&page_pools_lock);
}
EXPORT_SYMBOL(page_pool_disable_direct_recycling);
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
index 57439787b9c2..2fb06d5f6d55 100644
--- a/net/core/page_pool_priv.h
+++ b/net/core/page_pool_priv.h
@@ -7,6 +7,8 @@
#include "netmem_priv.h"
+extern struct mutex page_pools_lock;
+
s32 page_pool_inflight(const struct page_pool *pool, bool strict);
int page_pool_list(struct page_pool *pool);
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 48335766c1bf..6677e0c2e256 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -3,6 +3,7 @@
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/xarray.h>
+#include <net/busy_poll.h>
#include <net/net_debug.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
@@ -14,10 +15,11 @@
#include "netdev-genl-gen.h"
static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
-/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
+/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev,
+ * pool->user.
* Ordering: inside rtnl_lock
*/
-static DEFINE_MUTEX(page_pools_lock);
+DEFINE_MUTEX(page_pools_lock);
/* Page pools are only reachable from user space (via netlink) if they are
* linked to a netdev at creation time. Following page pool "visibility"
@@ -216,6 +218,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
{
struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
+ unsigned int napi_id;
void *hdr;
hdr = genlmsg_iput(rsp, info);
@@ -229,8 +232,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
pool->slow.netdev->ifindex))
goto err_cancel;
- if (pool->user.napi_id &&
- nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
+
+ napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0;
+ if (napi_id >= MIN_NAPI_ID &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id))
goto err_cancel;
inflight = page_pool_inflight(pool, false);
@@ -319,8 +324,6 @@ int page_pool_list(struct page_pool *pool)
if (pool->slow.netdev) {
hlist_add_head(&pool->user.list,
&pool->slow.netdev->page_pools);
- pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;
-
netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
}
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 7bb94875a7ec..34bee42e1247 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -998,7 +998,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
ethtool_get_flow_spec_ring(info.fs.ring_cookie))
return -EINVAL;
- if (ops->get_rxfh) {
+ if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) {
struct ethtool_rxfh_param rxfh = {};
rc = ops->get_rxfh(dev, &rxfh);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 87bb3a91598e..a4bacf198555 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -700,9 +700,12 @@ static int fill_frame_info(struct hsr_frame_info *frame,
frame->is_vlan = true;
if (frame->is_vlan) {
- if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr))
+ /* Note: skb->mac_len might be wrong here. */
+ if (!pskb_may_pull(skb,
+ skb_mac_offset(skb) +
+ offsetofend(struct hsr_vlan_ethhdr, vlanhdr)))
return -EINVAL;
- vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
+ vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb);
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b0fbf804bbba..0e4076866c0a 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -279,7 +279,7 @@ static void esp_output_done(void *data, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
}
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 03b6eee407a2..28d77d454d44 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -330,9 +330,6 @@ next_entry:
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
if (e < s_e)
goto next_entry2;
- if (filter->dev &&
- !mr_mfc_uses_dev(mrt, mfc, filter->dev))
- goto next_entry2;
err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e5b9a654254..bc95d2a5924f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -265,11 +265,14 @@ static u16 tcp_select_window(struct sock *sk)
u32 cur_win, new_win;
/* Make the window 0 if we failed to queue the data because we
- * are out of memory. The window is temporary, so we don't store
- * it on the socket.
+ * are out of memory.
*/
- if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+ if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) {
+ tp->pred_flags = 0;
+ tp->rcv_wnd = 0;
+ tp->rcv_wup = tp->rcv_nxt;
return 0;
+ }
cur_win = tcp_receive_window(tp);
new_win = __tcp_select_window(sk);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 5f3d0cc1555a..9e73944e3b53 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -315,7 +315,7 @@ static void esp_output_done(void *data, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe6..b3d5d1f266ee 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
toobig = skb->len > mtu && !skb_is_gso(skb);
- if (toobig && xfrm6_local_dontfrag(skb->sk)) {
+ if (toobig && xfrm6_local_dontfrag(sk)) {
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
} else if (toobig && xfrm6_noneed_fragment(skb)) {
skb->ignore_df = 1;
goto skip_frag;
- } else if (!skb->ignore_df && toobig && skb->sk) {
+ } else if (!skb->ignore_df && toobig && sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 3999e0ba2c35..2dd81e6c26bd 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -418,9 +418,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP);
subflow->mpc_drop = 1;
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
- } else {
- subflow->mpc_drop = 0;
}
+ } else if (ssk->sk_state == TCP_SYN_SENT) {
+ subflow->mpc_drop = 0;
}
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 123f3f297284..fd2de185bc93 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
- mp_opt->use_ack = 0;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
@@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("DSS\n");
ptr++;
- /* we must clear 'mpc_map' be able to detect MP_CAPABLE
- * map vs DSS map in mptcp_incoming_options(), and reconstruct
- * map info accordingly
- */
- mp_opt->mpc_map = 0;
flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb,
const unsigned char *ptr;
int length;
- /* initialize option status */
- mp_opt->suboptions = 0;
+ /* Ensure that casting the whole status to u32 is efficient and safe */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status),
+ sizeof(u32)));
+ *(u32 *)&mp_opt->status = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 98ac73938bd8..572d160edca3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -2020,7 +2020,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
+ MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
spin_unlock_bh(&pernet->lock);
GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c44c89ecaca6..6bd819047470 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1767,8 +1767,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* see mptcp_disconnect().
* Attempt it again outside the problematic scope.
*/
- if (!mptcp_disconnect(sk, 0))
+ if (!mptcp_disconnect(sk, 0)) {
+ sk->sk_disconnects++;
sk->sk_socket->state = SS_UNCONNECTED;
+ }
}
inet_clear_bit(DEFER_CONNECT, sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0174a5aad279..f6a207958459 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -149,22 +149,24 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u16 suboptions;
+ struct_group(status,
+ u16 suboptions;
+ u16 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
+ __unused:2;
+ );
+ u8 join_id;
u32 token;
u32 nonce;
- u16 use_map:1,
- dsn64:1,
- data_fin:1,
- use_ack:1,
- ack64:1,
- mpc_map:1,
- reset_reason:4,
- reset_transient:1,
- echo:1,
- backup:1,
- deny_join_id0:1,
- __unused:2;
- u8 join_id;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index bf276eaf9330..7891a537bddd 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1385,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_package;
break;
case ncsi_dev_state_probe_package:
+ if (ndp->package_probe_id >= 8) {
+ /* Last package probed, finishing */
+ ndp->flags |= NCSI_DEV_PROBED;
+ break;
+ }
+
ndp->pending_req_num = 1;
nca.type = NCSI_PKT_CMD_SP;
@@ -1501,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (ret)
goto error;
- /* Probe next package */
+ /* Probe next package after receiving response */
ndp->package_probe_id++;
- if (ndp->package_probe_id >= 8) {
- /* Probe finished */
- ndp->flags |= NCSI_DEV_PROBED;
- break;
- }
nd->state = ncsi_dev_state_probe_package;
ndp->active_package = NULL;
break;
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 14bd66909ca4..4a8ce2949fae 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1089,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
{
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct sockaddr *saddr = &ndp->pending_mac;
struct net_device *ndev = ndp->ndev.dev;
struct ncsi_rsp_gmcma_pkt *rsp;
- struct sockaddr saddr;
- int ret = -1;
int i;
rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
- saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
@@ -1108,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
rsp->addresses[i][4], rsp->addresses[i][5]);
}
+ saddr->sa_family = ndev->type;
for (i = 0; i < rsp->address_count; i++) {
- memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
- ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
- if (ret < 0) {
+ if (!is_valid_ether_addr(rsp->addresses[i])) {
netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
- saddr.sa_data);
+ rsp->addresses[i]);
continue;
}
- netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN);
+ netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data);
break;
}
- ndp->gma_flag = ret == 0;
- return ret;
+ ndp->gma_flag = 1;
+ return 0;
}
static struct ncsi_rsp_handler {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 667459256e4c..a34de9c17cf1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5078,7 +5078,7 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
- u32 num_regs = 0, key_num_regs = 0;
+ u32 len = 0, num_regs;
struct nlattr *attr;
int rem, err, i;
@@ -5092,12 +5092,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
}
for (i = 0; i < desc->field_count; i++)
- num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+ len += round_up(desc->field_len[i], sizeof(u32));
- key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
- if (key_num_regs != num_regs)
+ if (len != desc->klen)
return -EINVAL;
+ num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
if (num_regs > NFT_REG32_COUNT)
return -E2BIG;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index de175318a3a0..082ab66f120b 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -542,6 +542,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
pr_debug("pipe created=%d\n", pipe);
+ if (pipe >= NCI_HCI_MAX_PIPES)
+ pipe = NCI_HCI_INVALID_PIPE;
return pipe;
}
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index f06ddbed3fed..1525773e94aa 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -122,6 +122,10 @@ static void rose_heartbeat_expiry(struct timer_list *t)
struct rose_sock *rose = rose_sk(sk);
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20);
+ goto out;
+ }
switch (rose->state) {
case ROSE_STATE_0:
/* Magic here: If we listen() and a new link dies before it
@@ -152,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
}
rose_start_heartbeat(sk);
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
@@ -162,6 +167,10 @@ static void rose_timer_expiry(struct timer_list *t)
struct sock *sk = &rose->sock;
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &rose->timer, jiffies + HZ/20);
+ goto out;
+ }
switch (rose->state) {
case ROSE_STATE_1: /* T1 */
case ROSE_STATE_4: /* T2 */
@@ -182,6 +191,7 @@ static void rose_timer_expiry(struct timer_list *t)
}
break;
}
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
@@ -192,6 +202,10 @@ static void rose_idletimer_expiry(struct timer_list *t)
struct sock *sk = &rose->sock;
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20);
+ goto out;
+ }
rose_clear_queues(sk);
rose_write_internal(sk, ROSE_CLEAR_REQUEST);
@@ -207,6 +221,7 @@ static void rose_idletimer_expiry(struct timer_list *t)
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
}
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index d82e44a3901b..e874c31fa901 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -246,7 +246,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
bool use;
int slot;
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
while (!list_empty(collector)) {
peer = list_entry(collector->next,
@@ -257,7 +257,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
continue;
use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
if (use) {
keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
@@ -277,17 +277,17 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
*/
slot += cursor;
slot &= mask;
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
}
rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
}
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
}
/*
@@ -317,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
* second; the bucket at cursor + 1 goes at now + 1s and so
* on...
*/
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
list_splice_init(&rxnet->peer_keepalive_new, &collector);
stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
@@ -329,7 +329,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
}
base = now;
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxnet->peer_keepalive_base = base;
rxnet->peer_keepalive_cursor = cursor;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index e1c63129586b..0fcc87f0409f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -325,10 +325,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
rxrpc_init_peer(local, peer, hash_key);
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
}
/*
@@ -360,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
return NULL;
}
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
@@ -373,7 +373,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
&rxnet->peer_keepalive_new);
}
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
if (peer)
rxrpc_free_peer(candidate);
@@ -423,10 +423,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc_free_peer(peer);
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index f80bc05d4c5a..516038a44163 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
{
struct ets_sched *q = qdisc_priv(sch);
+ if (arg == 0 || arg > q->nbands)
+ return NULL;
return &q->classes[arg - 1];
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index fa9d1b49599b..075695173648 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
void vsock_remove_sock(struct vsock_sock *vsk)
{
- vsock_remove_bound(vsk);
+ /* Transport reassignment must not remove the binding. */
+ if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
+ vsock_remove_bound(vsk);
+
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -821,12 +824,13 @@ static void __vsock_release(struct sock *sk, int level)
*/
lock_sock_nested(sk, level);
+ sock_orphan(sk);
+
if (vsk->transport)
vsk->transport->release(vsk);
else if (sock_type_connectible(sk->sk_type))
vsock_remove_sock(vsk);
- sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
skb_queue_purge(&sk->sk_receive_queue);
@@ -1519,6 +1523,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
if (err < 0)
goto out;
+ /* sk_err might have been set as a result of an earlier
+ * (failed) connect attempt.
+ */
+ sk->sk_err = 0;
+
/* Mark sock as connecting and set the error code to in
* progress in case this is a non-blocking connect.
*/
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 98f1e2b67c76..c397eb99d867 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -506,7 +506,7 @@ xmit:
skb_dst_set(skb, dst);
skb->dev = tdev;
- err = dst_output(xi->net, skb->sk, skb);
+ err = dst_output(xi->net, skb_to_full_sk(skb), skb);
if (net_xmit_eval(err) == 0) {
dev_sw_netstats_tx_add(dev, 1, length);
} else {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index b5025cf6136e..f7abd42c077d 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -802,7 +802,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
!skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
skb->protocol = htons(ETH_P_IP);
- if (skb->sk)
+ if (skb->sk && sk_fullsock(skb->sk))
xfrm_local_error(skb, mtu);
else
icmp_send(skb, ICMP_DEST_UNREACH,
@@ -838,6 +838,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
struct dst_entry *dst = skb_dst(skb);
+ struct sock *sk = skb_to_full_sk(skb);
if (skb->ignore_df)
goto out;
@@ -852,9 +853,9 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
- if (xfrm6_local_dontfrag(skb->sk))
+ if (xfrm6_local_dontfrag(sk))
ipv6_stub->xfrm6_local_rxpmtu(skb, mtu);
- else if (skb->sk)
+ else if (sk)
xfrm_local_error(skb, mtu);
else
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9e510021ee91..6551e588fe52 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2964,7 +2964,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- dst_output(net, skb->sk, skb);
+ dst_output(net, skb_to_full_sk(skb), skb);
}
out:
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index e500aebbad22..dbdf8a39dffe 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -714,10 +714,12 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
oseq += skb_shinfo(skb)->gso_segs;
}
- if (unlikely(xo->seq.low < replay_esn->oseq)) {
- XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
- xo->seq.hi = oseq_hi;
- replay_esn->oseq_hi = oseq_hi;
+ if (unlikely(oseq < replay_esn->oseq)) {
+ replay_esn->oseq_hi = ++oseq_hi;
+ if (xo->seq.low < replay_esn->oseq) {
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.hi = oseq_hi;
+ }
if (replay_esn->oseq_hi == 0) {
replay_esn->oseq--;
replay_esn->oseq_hi--;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 34067cb8a479..ad2202fa82f3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -34,6 +34,8 @@
#define xfrm_state_deref_prot(table, net) \
rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+#define xfrm_state_deref_check(table, net) \
+ rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
static void xfrm_state_gc_task(struct work_struct *work);
@@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
u32 reqid,
unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
}
@@ -70,6 +74,8 @@ static inline unsigned int xfrm_src_hash(struct net *net,
const xfrm_address_t *saddr,
unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
}
@@ -77,11 +83,15 @@ static inline unsigned int
xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
__be32 spi, u8 proto, unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
}
static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
}
@@ -1108,16 +1118,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
x->props.family = tmpl->encap_family;
}
-static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
+struct xfrm_hash_state_ptrs {
+ const struct hlist_head *bydst;
+ const struct hlist_head *bysrc;
+ const struct hlist_head *byspi;
+ unsigned int hmask;
+};
+
+static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs)
+{
+ unsigned int sequence;
+
+ do {
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
+
+ ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net);
+ ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net);
+ ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net);
+ ptrs->hmask = net->xfrm.state_hmask;
+ } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence));
+}
+
+static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family,
struct xfrm_dev_offload *xdo)
{
- unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+ unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+ hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
#ifdef CONFIG_XFRM_OFFLOAD
if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1151,15 +1183,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
return NULL;
}
-static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
+static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family)
{
- unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+ unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+ hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
@@ -1181,11 +1214,11 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
__be32 spi, u8 proto,
unsigned short family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct hlist_head *state_cache_input;
struct xfrm_state *x = NULL;
- int cpu = get_cpu();
- state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+ state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input);
rcu_read_lock();
hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
@@ -1202,7 +1235,9 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
goto out;
}
- x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
if (x && x->km.state == XFRM_STATE_VALID) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -1217,20 +1252,20 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
out:
rcu_read_unlock();
- put_cpu();
return x;
}
EXPORT_SYMBOL(xfrm_input_state_lookup);
-static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
+static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
u8 proto, unsigned short family)
{
- unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
+ unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
+ hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) {
if (x->props.family != family ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -1250,14 +1285,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
static inline struct xfrm_state *
__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct net *net = xs_net(x);
u32 mark = x->mark.v & x->mark.m;
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
if (use_spi)
- return __xfrm_state_lookup(net, mark, &x->id.daddr,
+ return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr,
x->id.spi, x->id.proto, family);
else
- return __xfrm_state_lookup_byaddr(net, mark,
+ return __xfrm_state_lookup_byaddr(&state_ptrs, mark,
&x->id.daddr,
&x->props.saddr,
x->id.proto, family);
@@ -1331,6 +1369,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family, u32 if_id)
{
static xfrm_address_t saddr_wildcard = { };
+ struct xfrm_hash_state_ptrs state_ptrs;
struct net *net = xp_net(pol);
unsigned int h, h_wildcard;
struct xfrm_state *x, *x0, *to_put;
@@ -1395,8 +1434,10 @@ cached:
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
WARN_ON(1);
- h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
- hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
+ hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1429,8 +1470,9 @@ cached:
if (best || acquire_in_progress)
goto found;
- h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
- hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
+ h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid,
+ encap_family, state_ptrs.hmask);
+ hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1468,7 +1510,7 @@ found:
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
- (x0 = __xfrm_state_lookup_all(net, mark, daddr,
+ (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr,
tmpl->id.spi, tmpl->id.proto,
encap_family,
&pol->xdo)) != NULL) {
@@ -2253,10 +2295,13 @@ struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct xfrm_state *x;
rcu_read_lock();
- x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
rcu_read_unlock();
return x;
}
@@ -2267,10 +2312,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
u8 proto, unsigned short family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
+
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
}
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index e16cef160bc2..ce32cb35007d 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
- aend_off = astart_off + ynl_attr_data_len(attr);
+ aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
if (aend_off <= off)
continue;
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
index 38034fb82530..02b82774469c 100644
--- a/tools/testing/selftests/bpf/progs/find_vma.c
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
{
if (vma->vm_file)
bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
- vma->vm_file->f_path.dentry->d_iname);
+ vma->vm_file->f_path.dentry->d_shortname.string);
/* check for VM_EXEC */
if (vma->vm_flags & VM_EXEC)
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 384cfa3d38a6..92c2f0376c08 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -142,7 +142,7 @@ function pre_ethtool {
}
function check_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
local -n expected=$2
local last=$3
@@ -212,7 +212,7 @@ function check_tables {
}
function print_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
read -a have < $path
tree $NSIM_DEV_DFS/
@@ -641,7 +641,7 @@ for port in 0 1; do
NSIM_NETDEV=`get_netdev_name old_netdevs`
ip link set dev $NSIM_NETDEV up
- echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
msg="1 - create VxLANs v6"
exp0=( 0 0 0 0 )
@@ -663,7 +663,7 @@ for port in 0 1; do
new_geneve gnv0 20000
msg="2 - destroy GENEVE"
- echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
exp1=( `mke 20000 2` 0 0 0 )
del_dev gnv0
@@ -764,7 +764,7 @@ for port in 0 1; do
msg="create VxLANs v4"
new_vxlan vxlan0 10000 $NSIM_NETDEV
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes down"
@@ -775,7 +775,7 @@ for port in 0 1; do
fi
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes up again"
@@ -789,7 +789,7 @@ for port in 0 1; do
del_dev vxlan0
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="destroy NIC"
@@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table"
exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
del_dev vxlan2
-echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="tunnels destroyed 2"
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
index 6fb66a687f17..bbc29ed9c60a 100755
--- a/tools/testing/selftests/gpio/gpio-sim.sh
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -46,12 +46,6 @@ remove_chip() {
rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
}
-configfs_cleanup() {
- for CHIP in `ls $CONFIGFS_DIR/`; do
- remove_chip $CHIP
- done
-}
-
create_chip() {
local CHIP=$1
@@ -105,6 +99,13 @@ disable_chip() {
echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
}
+configfs_cleanup() {
+ for CHIP in `ls $CONFIGFS_DIR/`; do
+ disable_chip $CHIP
+ remove_chip $CHIP
+ done
+}
+
configfs_chip_name() {
local CHIP=$1
local BANK=$2
@@ -181,6 +182,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+disable_chip chip
remove_chip chip
echo "1.2. chip_name returns 'none' if the chip is still pending"
@@ -195,6 +197,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+disable_chip chip
remove_chip chip
echo "2. Creating and configuring simulated chips"
@@ -204,6 +207,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+disable_chip chip
remove_chip chip
echo "2.2. Number of lines can be specified"
@@ -212,6 +216,7 @@ create_bank chip bank
set_num_lines chip bank 16
enable_chip chip
test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+disable_chip chip
remove_chip chip
echo "2.3. Label can be set"
@@ -220,6 +225,7 @@ create_bank chip bank
set_label chip bank foobar
enable_chip chip
test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+disable_chip chip
remove_chip chip
echo "2.4. Label can be left empty"
@@ -227,6 +233,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+disable_chip chip
remove_chip chip
echo "2.5. Line names can be configured"
@@ -238,6 +245,7 @@ set_line_name chip bank 2 bar
enable_chip chip
test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+disable_chip chip
remove_chip chip
echo "2.6. Line config can remain unused if offset is greater than number of lines"
@@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar
enable_chip chip
test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+disable_chip chip
remove_chip chip
echo "2.7. Line configfs directory names are sanitized"
@@ -267,6 +276,7 @@ for CHIP in $CHIPS; do
enable_chip $CHIP
done
for CHIP in $CHIPS; do
+ disable_chip $CHIP
remove_chip $CHIP
done
@@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
fail "Setting label of a live chip should fail"
echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
fail "Setting number of lines of a live chip should fail"
+disable_chip chip
remove_chip chip
echo "2.10. Can't create line items when chip is live"
@@ -285,6 +296,7 @@ create_chip chip
create_bank chip bank
enable_chip chip
mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+disable_chip chip
remove_chip chip
echo "2.11. Probe errors are propagated to user-space"
@@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
enable_chip chip
$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
fail "Setting the value of a hogged line shouldn't succeed"
+disable_chip chip
remove_chip chip
echo "3. Controlling simulated chips"
@@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly"
sysfs_set_pull chip bank 0 pull-down
$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
test "$?" = "0" || fail "pull set incorrectly"
+disable_chip chip
remove_chip chip
echo "3.2. Pull can be read from sysfs"
@@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
sysfs_set_pull chip bank 0 pull-up
test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+disable_chip chip
remove_chip chip
echo "3.3. Incorrect input in sysfs is rejected"
@@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip`
CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+disable_chip chip
remove_chip chip
echo "3.4. Can't write to value"
@@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip`
CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+disable_chip chip
remove_chip chip
echo "4. Simulated GPIO chips are functional"
@@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
sleep 0.1 # FIXME Any better way?
test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
kill $!
+disable_chip chip
remove_chip chip
echo "4.2. Bias settings work correctly"
@@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank`
SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+disable_chip chip
remove_chip chip
echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index d10f420e4ef6..fd0d959914e4 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -215,12 +215,14 @@ def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
- fail=True, include_stderr=False):
+ fail=True, include_stderr=False, dev_bind=None):
args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
if prog_type is not None:
args += " type " + prog_type
if dev is not None:
args += " dev " + dev
+ elif dev_bind is not None:
+ args += " xdpmeta_dev " + dev_bind
if len(maps):
args += " map " + " map ".join(maps)
@@ -980,6 +982,16 @@ try:
rm("/sys/fs/bpf/offload")
sim.wait_for_flush()
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound",
+ dev_bind=sim['ifname'])
+ devbound = bpf_pinned("/sys/fs/bpf/devbound")
+ start_test("Test dev-bound program in generic mode...")
+ ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True)
+ fail(ret == 0, "devbound program in generic mode allowed")
+ check_extack(err, "Can't attach device-bound programs in generic mode.", args)
+ rm("/sys/fs/bpf/devbound")
+ sim.wait_for_flush()
+
start_test("Test XDP load failure...")
sim.dfs["dev/bpf_bind_verifier_accept"] = 0
ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 18b9443454a9..bc6b6762baf3 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../../
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 8e3fc05a5397..c76525fe2b84 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -2,7 +2,7 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
index 2f1508abc826..3fd1da2ec07d 100644
--- a/tools/testing/selftests/net/openvswitch/Makefile
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -2,7 +2,7 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := openvswitch.sh
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index e15c43b7359b..ef8b25a606d8 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,13 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
"tcp_zerocopy_epoll_.*.pkt"
- "tcp_tcp_info_tcp-info-*-limited.pkt"
+ "tcp_tcp_info_tcp-info-.*-limited.pkt"
)
readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
[[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 7058dc614c25..de25892f865f 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
-/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
-int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
+int vsock_bind(unsigned int cid, unsigned int port, int type)
{
- struct sockaddr_vm sa_client = {
- .svm_family = AF_VSOCK,
- .svm_cid = VMADDR_CID_ANY,
- .svm_port = bind_port,
- };
- struct sockaddr_vm sa_server = {
+ struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
.svm_cid = cid,
.svm_port = port,
};
+ int fd;
- int client_fd, ret;
-
- client_fd = socket(AF_VSOCK, type, 0);
- if (client_fd < 0) {
+ fd = socket(AF_VSOCK, type, 0);
+ if (fd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
- if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) {
+ if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) {
perror("bind");
exit(EXIT_FAILURE);
}
+ return fd;
+}
+
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port)
+{
+ struct sockaddr_vm sa = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = cid,
+ .svm_port = port,
+ };
+ int ret;
+
timeout_begin(TIMEOUT);
do {
- ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server));
+ ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa));
timeout_check("connect");
} while (ret < 0 && errno == EINTR);
timeout_end();
- if (ret < 0) {
+ return ret;
+}
+
+/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
+int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+{
+ int client_fd;
+
+ client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type);
+
+ if (vsock_connect_fd(client_fd, cid, port)) {
perror("connect");
exit(EXIT_FAILURE);
}
@@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
/* Connect to <cid, port> and return the file descriptor. */
int vsock_connect(unsigned int cid, unsigned int port, int type)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = port,
- .svm_cid = cid,
- },
- };
- int ret;
int fd;
control_expectln("LISTENING");
@@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type)
exit(EXIT_FAILURE);
}
- timeout_begin(TIMEOUT);
- do {
- ret = connect(fd, &addr.sa, sizeof(addr.svm));
- timeout_check("connect");
- } while (ret < 0 && errno == EINTR);
- timeout_end();
-
- if (ret < 0) {
+ if (vsock_connect_fd(fd, cid, port)) {
int old_errno = errno;
close(fd);
fd = -1;
errno = old_errno;
}
+
return fd;
}
@@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
/* Listen on <cid, port> and return the file descriptor. */
static int vsock_listen(unsigned int cid, unsigned int port, int type)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = port,
- .svm_cid = cid,
- },
- };
int fd;
- fd = socket(AF_VSOCK, type, 0);
- if (fd < 0) {
- perror("socket");
- exit(EXIT_FAILURE);
- }
-
- if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
- perror("bind");
- exit(EXIT_FAILURE);
- }
+ fd = vsock_bind(cid, port, type);
if (listen(fd, 1) < 0) {
perror("listen");
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e62f46b2b92a..d1f765ce3eee 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,10 +39,12 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
unsigned int parse_port(const char *str);
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port);
int vsock_connect(unsigned int cid, unsigned int port, int type);
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind(unsigned int cid, unsigned int port, int type);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 1eebbc0d5f61..dfff8b288265 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -113,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts)
static void test_stream_bind_only_server(const struct test_opts *opts)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = opts->peer_port,
- .svm_cid = VMADDR_CID_ANY,
- },
- };
int fd;
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
-
- if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
- perror("bind");
- exit(EXIT_FAILURE);
- }
+ fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM);
/* Notify the client that the server is ready */
control_writeln("BIND");
@@ -1708,6 +1693,101 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
close(fd);
}
+#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */
+
+/* Test attempts to trigger a transport release for an unbound socket. This can
+ * lead to a reference count mishandling.
+ */
+static void test_stream_transport_uaf_client(const struct test_opts *opts)
+{
+ int sockets[MAX_PORT_RETRIES];
+ struct sockaddr_vm addr;
+ int fd, i, alen;
+
+ fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM);
+
+ alen = sizeof(addr);
+ if (getsockname(fd, (struct sockaddr *)&addr, &alen)) {
+ perror("getsockname");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < MAX_PORT_RETRIES; ++i)
+ sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port,
+ SOCK_STREAM);
+
+ close(fd);
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) {
+ perror("Unexpected connect() #1 success");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Vulnerable system may crash now. */
+ if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) {
+ perror("Unexpected connect() #2 success");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+ while (i--)
+ close(sockets[i]);
+
+ control_writeln("DONE");
+}
+
+static void test_stream_transport_uaf_server(const struct test_opts *opts)
+{
+ control_expectln("DONE");
+}
+
+static void test_stream_connect_retry_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+ fprintf(stderr, "Unexpected connect() #1 success\n");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTEN");
+ control_expectln("LISTENING");
+
+ if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+ perror("connect() #2");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_connect_retry_server(const struct test_opts *opts)
+{
+ int fd;
+
+ control_expectln("LISTEN");
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1853,6 +1933,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_msgzcopy_leak_zcskb_client,
.run_server = test_stream_msgzcopy_leak_zcskb_server,
},
+ {
+ .name = "SOCK_STREAM transport release use-after-free",
+ .run_client = test_stream_transport_uaf_client,
+ .run_server = test_stream_transport_uaf_server,
+ },
+ {
+ .name = "SOCK_STREAM retry failed connect()",
+ .run_client = test_stream_connect_retry_client,
+ .run_server = test_stream_connect_retry_server,
+ },
{},
};