diff options
986 files changed, 10912 insertions, 13547 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 025b7cf3768d..bd4975e132d3 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass Date: January 2018 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 11fc28ecdb6d..cc0ac035b8fe 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1705,7 +1705,6 @@ nopanic merge nomerge - forcesac soft pt [x86, IA-64] nobypass [PPC/POWERNV] @@ -2680,6 +2679,9 @@ allow data leaks with this option, which is equivalent to spectre_v2=off. + nospec_store_bypass_disable + [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -4025,6 +4027,48 @@ Not specifying this option is equivalent to spectre_v2=auto. + spec_store_bypass_disable= + [HW] Control Speculative Store Bypass (SSB) Disable mitigation + (Speculative Store Bypass vulnerability) + + Certain CPUs are vulnerable to an exploit against a + a common industry wide performance optimization known + as "Speculative Store Bypass" in which recent stores + to the same memory location may not be observed by + later loads during speculative execution. The idea + is that such stores are unlikely and that they can + be detected prior to instruction retirement at the + end of a particular speculation execution window. + + In vulnerable processors, the speculatively forwarded + store can be used in a cache side channel attack, for + example to read memory to which the attacker does not + directly have access (e.g. inside sandboxed code). + + This parameter controls whether the Speculative Store + Bypass optimization is used. + + on - Unconditionally disable Speculative Store Bypass + off - Unconditionally enable Speculative Store Bypass + auto - Kernel detects whether the CPU model contains an + implementation of Speculative Store Bypass and + picks the most appropriate mitigation. If the + CPU is not vulnerable, "off" is selected. If the + CPU is vulnerable the default mitigation is + architecture and Kconfig dependent. See below. + prctl - Control Speculative Store Bypass per thread + via prctl. Speculative Store Bypass is enabled + for a process by default. The state of the control + is inherited on fork. + seccomp - Same as "prctl" above, but all seccomp threads + will disable SSB unless they explicitly opt out. + + Not specifying this option is equivalent to + spec_store_bypass_disable=auto. + + Default mitigations: + X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt index 733927a7b501..07f147381f32 100644 --- a/Documentation/block/null_blk.txt +++ b/Documentation/block/null_blk.txt @@ -71,13 +71,16 @@ use_per_node_hctx=[0/1]: Default: 0 1: The multi-queue block layer is instantiated with a hardware dispatch queue for each CPU node in the system. -use_lightnvm=[0/1]: Default: 0 - Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled. - no_sched=[0/1]: Default: 0 0: nullb* use default blk-mq io scheduler. 1: nullb* doesn't use io scheduler. +blocking=[0/1]: Default: 0 + 0: Register as a non-blocking blk-mq driver device. + 1: Register as a blocking blk-mq driver device, null_blk will set + the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always + needs to block in its ->queue_rq() function. + shared_tags=[0/1]: Default: 0 0: Tag set is not shared. 1: Tag set shared between devices for blk-mq. Only makes sense with diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt index 8acf51a4dfa8..47a6a7fe0b86 100644 --- a/Documentation/devicetree/bindings/net/dsa/b53.txt +++ b/Documentation/devicetree/bindings/net/dsa/b53.txt @@ -10,6 +10,7 @@ Required properties: "brcm,bcm53128" "brcm,bcm5365" "brcm,bcm5395" + "brcm,bcm5389" "brcm,bcm5397" "brcm,bcm5398" diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index 42a248301615..e22d8cfea687 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -57,6 +57,13 @@ KSZ9031: - txd2-skew-ps : Skew control of TX data 2 pad - txd3-skew-ps : Skew control of TX data 3 pad + - micrel,force-master: + Boolean, force phy to master mode. Only set this option if the phy + reference clock provided at CLK125_NDO pin is used as MAC reference + clock because the clock jitter in slave mode is to high (errata#2). + Attention: The link partner must be configurable as slave otherwise + no link will be established. + Examples: mdio { diff --git a/Documentation/features/io/dma-api-debug/arch-support.txt b/Documentation/features/io/dma-api-debug/arch-support.txt deleted file mode 100644 index e438ed675623..000000000000 --- a/Documentation/features/io/dma-api-debug/arch-support.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# Feature name: dma-api-debug -# Kconfig: HAVE_DMA_API_DEBUG -# description: arch supports DMA debug facilities -# - ----------------------- - | arch |status| - ----------------------- - | alpha: | TODO | - | arc: | TODO | - | arm: | ok | - | arm64: | ok | - | c6x: | ok | - | h8300: | TODO | - | hexagon: | TODO | - | ia64: | ok | - | m68k: | TODO | - | microblaze: | ok | - | mips: | ok | - | nios2: | TODO | - | openrisc: | TODO | - | parisc: | TODO | - | powerpc: | ok | - | s390: | ok | - | sh: | ok | - | sparc: | ok | - | um: | TODO | - | unicore32: | TODO | - | x86: | ok | - | xtensa: | ok | - ----------------------- diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 75d2d57e2c44..15853d522941 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -69,31 +69,31 @@ prototypes: locking rules: all may block - i_mutex(inode) -lookup: yes -create: yes -link: yes (both) -mknod: yes -symlink: yes -mkdir: yes -unlink: yes (both) -rmdir: yes (both) (see below) -rename: yes (all) (see below) + i_rwsem(inode) +lookup: shared +create: exclusive +link: exclusive (both) +mknod: exclusive +symlink: exclusive +mkdir: exclusive +unlink: exclusive (both) +rmdir: exclusive (both)(see below) +rename: exclusive (all) (see below) readlink: no get_link: no -setattr: yes +setattr: exclusive permission: no (may not block if called in rcu-walk mode) get_acl: no getattr: no listxattr: no fiemap: no update_time: no -atomic_open: yes +atomic_open: exclusive tmpfile: no - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on -victim. + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem + exclusive on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. See Documentation/filesystems/directory-locking for more detailed discussion @@ -111,10 +111,10 @@ prototypes: locking rules: all may block - i_mutex(inode) + i_rwsem(inode) list: no get: no -set: yes +set: exclusive --------------------------- super_operations --------------------------- prototypes: @@ -217,14 +217,14 @@ prototypes: locking rules: All except set_page_dirty and freepage may block - PageLocked(page) i_mutex + PageLocked(page) i_rwsem writepage: yes, unlocks (see below) readpage: yes, unlocks writepages: set_page_dirty no readpages: -write_begin: locks the page yes -write_end: yes, unlocks yes +write_begin: locks the page exclusive +write_end: yes, unlocks exclusive bmap: invalidatepage: yes releasepage: yes @@ -439,6 +439,7 @@ prototypes: ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); @@ -480,6 +481,10 @@ mutex or just to use i_size_read() instead. Note: this does not protect the file->f_pos against concurrent modifications since this is something the userspace has to take care about. +->iterate() is called with i_rwsem exclusive. + +->iterate_shared() is called with i_rwsem at least shared. + ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. Most instances call fasync_helper(), which does that maintenance, so it's not normally something one needs to worry about. Return values > 0 will be diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores index c269aaa2f26a..9e1dfe7553ad 100644 --- a/Documentation/i2c/busses/i2c-ocores +++ b/Documentation/i2c/busses/i2c-ocores @@ -2,7 +2,7 @@ Kernel driver i2c-ocores Supported adapters: * OpenCores.org I2C controller by Richard Herveille (see datasheet link) - Datasheet: http://www.opencores.org/projects.cgi/web/i2c/overview + https://opencores.org/project/i2c/overview Author: Peter Korsgaard <jacmet@sunsite.dk> diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt index 091d20273dcb..61daf4b39600 100644 --- a/Documentation/networking/ppp_generic.txt +++ b/Documentation/networking/ppp_generic.txt @@ -300,12 +300,6 @@ unattached instance are: The ioctl calls available on an instance of /dev/ppp attached to a channel are: -* PPPIOCDETACH detaches the instance from the channel. This ioctl is - deprecated since the same effect can be achieved by closing the - instance. In order to prevent possible races this ioctl will fail - with an EINVAL error if more than one file descriptor refers to this - instance (i.e. as a result of dup(), dup2() or fork()). - * PPPIOCCONNECT connects this channel to a PPP interface. The argument should point to an int containing the interface unit number. It will return an EINVAL error if the channel is already diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt index 11e447bdb3a5..1b7436932a2b 100644 --- a/Documentation/scsi/scsi_eh.txt +++ b/Documentation/scsi/scsi_eh.txt @@ -82,24 +82,13 @@ function 1. invokes optional hostt->eh_timed_out() callback. Return value can be one of - - BLK_EH_HANDLED - This indicates that eh_timed_out() dealt with the timeout. - The command is passed back to the block layer and completed - via __blk_complete_requests(). - - *NOTE* After returning BLK_EH_HANDLED the SCSI layer is - assumed to be finished with the command, and no other - functions from the SCSI layer will be called. So this - should typically only be returned if the eh_timed_out() - handler raced with normal completion. - - BLK_EH_RESET_TIMER This indicates that more time is required to finish the command. Timer is restarted. This action is counted as a retry and only allowed scmd->allowed + 1(!) times. Once the - limit is reached, action for BLK_EH_NOT_HANDLED is taken instead. + limit is reached, action for BLK_EH_DONE is taken instead. - - BLK_EH_NOT_HANDLED + - BLK_EH_DONE eh_timed_out() callback did not handle the command. Step #2 is taken. diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 7b2eb1b7d4ca..a3233da7fa88 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -19,6 +19,7 @@ place where this information is gathered. no_new_privs seccomp_filter unshare + spec_ctrl .. only:: subproject and html diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst new file mode 100644 index 000000000000..32f3d55c54b7 --- /dev/null +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -0,0 +1,94 @@ +=================== +Speculation Control +=================== + +Quite some CPUs have speculation-related misfeatures which are in +fact vulnerabilities causing data leaks in various forms even across +privilege domains. + +The kernel provides mitigation for such vulnerabilities in various +forms. Some of these mitigations are compile-time configurable and some +can be supplied on the kernel command line. + +There is also a class of mitigations which are very expensive, but they can +be restricted to a certain set of processes or tasks in controlled +environments. The mechanism to control these mitigations is via +:manpage:`prctl(2)`. + +There are two prctl options which are related to this: + + * PR_GET_SPECULATION_CTRL + + * PR_SET_SPECULATION_CTRL + +PR_GET_SPECULATION_CTRL +----------------------- + +PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature +which is selected with arg2 of prctl(2). The return value uses bits 0-3 with +the following meaning: + +==== ===================== =================================================== +Bit Define Description +==== ===================== =================================================== +0 PR_SPEC_PRCTL Mitigation can be controlled per task by + PR_SET_SPECULATION_CTRL. +1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is + disabled. +2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is + enabled. +3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A + subsequent prctl(..., PR_SPEC_ENABLE) will fail. +==== ===================== =================================================== + +If all bits are 0 the CPU is not affected by the speculation misfeature. + +If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is +available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation +misfeature will fail. + +PR_SET_SPECULATION_CTRL +----------------------- + +PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which +is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand +in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or +PR_SPEC_FORCE_DISABLE. + +Common error codes +------------------ +======= ================================================================= +Value Meaning +======= ================================================================= +EINVAL The prctl is not implemented by the architecture or unused + prctl(2) arguments are not 0. + +ENODEV arg2 is selecting a not supported speculation misfeature. +======= ================================================================= + +PR_SET_SPECULATION_CTRL error codes +----------------------------------- +======= ================================================================= +Value Meaning +======= ================================================================= +0 Success + +ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor + PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE. + +ENXIO Control of the selected speculation misfeature is not possible. + See PR_GET_SPECULATION_CTRL. + +EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller + tried to enable it again. +======= ================================================================= + +Speculation misfeature controls +------------------------------- +- PR_SPEC_STORE_BYPASS: Speculative Store Bypass + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index b297c48389b9..8d109ef67ab6 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -187,9 +187,9 @@ PCI IOMMU (input/output memory management unit) - Currently four x86-64 PCI-DMA mapping implementations exist: + Multiple x86-64 PCI-DMA mapping implementations exist, for example: - 1. <arch/x86_64/kernel/pci-nommu.c>: use no hardware/software IOMMU at all + 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all (e.g. because you have < 3 GB memory). Kernel boot message: "PCI-DMA: Disabling IOMMU" @@ -208,7 +208,7 @@ IOMMU (input/output memory management unit) Kernel boot message: "PCI-DMA: Using Calgary IOMMU" iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>] - [,memaper[=<order>]][,merge][,forcesac][,fullflush][,nomerge] + [,memaper[=<order>]][,merge][,fullflush][,nomerge] [,noaperture][,calgary] General iommu options: @@ -235,14 +235,7 @@ IOMMU (input/output memory management unit) (experimental). nomerge Don't do scatter-gather (SG) merging. noaperture Ask the IOMMU not to touch the aperture for AGP. - forcesac Force single-address cycle (SAC) mode for masks <40bits - (experimental). noagp Don't initialize the AGP driver and use full aperture. - allowdac Allow double-address cycle (DAC) mode, i.e. DMA >4GB. - DAC is used with 32-bit PCI to push a 64-bit address in - two cycles. When off all DMA over >4GB is forced through - an IOMMU or software bounce buffering. - nodac Forbid DAC mode, i.e. DMA >4GB. panic Always panic when IOMMU overflows. calgary Use the Calgary IOMMU if it is available diff --git a/MAINTAINERS b/MAINTAINERS index 1de11c4c3b56..aa2031ee7d18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2332,7 +2332,7 @@ F: drivers/gpio/gpio-ath79.c F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt ATHEROS ATH GENERIC UTILITIES -M: "Luis R. Rodriguez" <mcgrof@do-not-panic.com> +M: Kalle Valo <kvalo@codeaurora.org> L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/ath/* @@ -2347,7 +2347,7 @@ S: Maintained F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER -M: Kalle Valo <kvalo@qca.qualcomm.com> +M: Kalle Valo <kvalo@codeaurora.org> L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/users/Drivers/ath6kl T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -4330,12 +4330,14 @@ W: http://git.infradead.org/users/hch/dma-mapping.git S: Supported F: lib/dma-debug.c F: lib/dma-direct.c +F: lib/dma-noncoherent.c F: lib/dma-virt.c F: drivers/base/dma-mapping.c F: drivers/base/dma-coherent.c F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h +F: include/linux/dma-noncoherent.h DME1737 HARDWARE MONITOR DRIVER M: Juerg Haefliger <juergh@gmail.com> @@ -5388,7 +5390,6 @@ S: Maintained F: drivers/iommu/exynos-iommu.c EZchip NPS platform support -M: Elad Kanfi <eladkan@mellanox.com> M: Vineet Gupta <vgupta@synopsys.com> S: Supported F: arch/arc/plat-eznps @@ -6505,9 +6506,15 @@ F: Documentation/networking/hinic.txt F: drivers/net/ethernet/huawei/hinic/ HUGETLB FILESYSTEM -M: Nadia Yvette Chambers <nyc@holomorphy.com> +M: Mike Kravetz <mike.kravetz@oracle.com> +L: linux-mm@kvack.org S: Maintained F: fs/hugetlbfs/ +F: mm/hugetlb.c +F: include/linux/hugetlb.h +F: Documentation/admin-guide/mm/hugetlbpage.rst +F: Documentation/vm/hugetlbfs_reserv.rst +F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages HVA ST MEDIA DRIVER M: Jean-Christophe Trotin <jean-christophe.trotin@st.com> @@ -9022,7 +9029,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx5/core/en_* MELLANOX ETHERNET INNOVA DRIVER -M: Ilan Tayari <ilant@mellanox.com> R: Boris Pismenny <borisp@mellanox.com> L: netdev@vger.kernel.org S: Supported @@ -9032,7 +9038,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET INNOVA IPSEC DRIVER -M: Ilan Tayari <ilant@mellanox.com> R: Boris Pismenny <borisp@mellanox.com> L: netdev@vger.kernel.org S: Supported @@ -9088,7 +9093,6 @@ F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@mellanox.com> -M: Matan Barak <matanb@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org @@ -9099,7 +9103,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ MELLANOX MLX5 IB driver -M: Matan Barak <matanb@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com> L: linux-rdma@vger.kernel.org W: http://www.mellanox.com @@ -9700,7 +9703,7 @@ S: Maintained F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) -M: Josef Bacik <jbacik@fb.com> +M: Josef Bacik <josef@toxicpanda.com> S: Maintained L: linux-block@vger.kernel.org L: nbd@other.debian.org @@ -9833,7 +9836,6 @@ F: net/netfilter/xt_CONNSECMARK.c F: net/netfilter/xt_SECMARK.c NETWORKING [TLS] -M: Ilya Lesokhin <ilyal@mellanox.com> M: Aviad Yehezkel <aviadye@mellanox.com> M: Dave Watson <davejwatson@fb.com> L: netdev@vger.kernel.org @@ -11633,7 +11635,7 @@ S: Maintained F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo <kvalo@qca.qualcomm.com> +M: Kalle Valo <kvalo@codeaurora.org> L: ath10k@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/ath10k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -11684,7 +11686,7 @@ S: Maintained F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Eugene Krasnikov <k.eugene.e@gmail.com> +M: Kalle Valo <kvalo@codeaurora.org> L: wcn36xx@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/wcn36xx T: git git://github.com/KrasnikovEugene/wcn36xx.git @@ -15514,6 +15516,14 @@ L: linux-kernel@vger.kernel.org S: Supported F: drivers/char/xillybus/ +XLP9XX I2C DRIVER +M: George Cherian <george.cherian@cavium.com> +M: Jan Glauber <jglauber@cavium.com> +L: linux-i2c@vger.kernel.org +W: http://www.cavium.com +S: Supported +F: drivers/i2c/busses/i2c-xlp9xx.c + XRA1403 GPIO EXPANDER M: Nandor Han <nandor.han@ge.com> M: Semi Malinen <semi.malinen@ge.com> @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = Merciless Moray # *DOCUMENTATION* @@ -500,6 +500,9 @@ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) export RETPOLINE_CFLAGS +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) + # check for 'asm goto' ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) CC_HAVE_ASM_GOTO := 1 @@ -621,9 +624,9 @@ endif # $(dot-config) # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) -KBUILD_AFLAGS += $(call cc-option,-fno-PIE) -CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) +CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \ + $(call cc-option,-fno-tree-loop-im) \ + $(call cc-disable-warning,maybe-uninitialized,) export CFLAGS_GCOV CFLAGS_KCOV # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default diff --git a/arch/Kconfig b/arch/Kconfig index 75dd23acf133..b624634daea6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -278,9 +278,6 @@ config HAVE_CLK The <linux/clk.h> calls support software clock gating and thus are a key power management tool on many systems. -config HAVE_DMA_API_DEBUG - bool - config HAVE_HW_BREAKPOINT bool depends on PERF_EVENTS diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b2022885ced8..94af0c7f494a 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -10,6 +10,8 @@ config ALPHA select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH select VIRT_TO_BUS select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP @@ -64,15 +66,6 @@ config ZONE_DMA bool default y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool default y @@ -211,6 +204,7 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" depends on BROKEN + select DMA_DIRECT_OPS help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one of the first-generation Alpha systems. A number of these systems @@ -345,9 +339,6 @@ config PCI_DOMAINS config PCI_SYSCALL def_bool PCI -config IOMMU_HELPER - def_bool PCI - config ALPHA_NONAME bool depends on ALPHA_BOOK1 || ALPHA_NONAME_CH diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index b78f61f20796..8beeafd4f68e 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -2,11 +2,15 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -extern const struct dma_map_ops *dma_ops; +extern const struct dma_map_ops alpha_pci_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return dma_ops; +#ifdef CONFIG_ALPHA_JENSEN + return &dma_direct_ops; +#else + return &alpha_pci_ops; +#endif } #endif /* _ALPHA_DMA_MAPPING_H */ diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index b9ec55351924..cf6bc1e64d66 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -56,11 +56,6 @@ struct pci_controller { /* IOMMU controls. */ -/* The PCI address space does not equal the physical memory address space. - The networking and block device layers use this boolean for bounce buffer - decisions. */ -#define PCI_DMA_BUS_IS_PHYS 0 - /* TODO: integrate with include/asm-generic/pci.h ? */ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 3e3d49c254c5..c025a3e5e357 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr) void iowrite8(u8 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); } void iowrite16(u16 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); } void iowrite32(u32 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); } EXPORT_SYMBOL(ioread8); @@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr) void writeb(u8 b, volatile void __iomem *addr) { - __raw_writeb(b, addr); mb(); + __raw_writeb(b, addr); } void writew(u16 b, volatile void __iomem *addr) { - __raw_writew(b, addr); mb(); + __raw_writew(b, addr); } void writel(u32 b, volatile void __iomem *addr) { - __raw_writel(b, addr); mb(); + __raw_writel(b, addr); } void writeq(u64 b, volatile void __iomem *addr) { - __raw_writeq(b, addr); mb(); + __raw_writeq(b, addr); } EXPORT_SYMBOL(readb); diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index b6ebb65127a8..c7c5879869d3 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, else return -ENODEV; } - -static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - if (!dev || *dev->dma_mask >= 0xffffffffUL) - gfp &= ~GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} - -static int alpha_noop_supported(struct device *dev, u64 mask) -{ - return mask < 0x00ffffffUL ? 0 : 1; -} - -const struct dma_map_ops alpha_noop_ops = { - .alloc = alpha_noop_alloc_coherent, - .free = dma_noop_free_coherent, - .map_page = dma_noop_map_page, - .map_sg = dma_noop_map_sg, - .mapping_error = dma_noop_mapping_error, - .dma_supported = alpha_noop_supported, -}; - -const struct dma_map_ops *dma_ops = &alpha_noop_ops; -EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 83b34b9188ea..6923b0d9c1e1 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = { .mapping_error = alpha_pci_mapping_error, .dma_supported = alpha_pci_supported, }; - -const struct dma_map_ops *dma_ops = &alpha_pci_ops; -EXPORT_SYMBOL(dma_ops); +EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d76bf4a83740..89d47eac18b2 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,11 +9,15 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SG_CHAIN select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DMA_NONCOHERENT_OPS + select DMA_NONCOHERENT_MMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -453,16 +457,11 @@ config ARC_HAS_PAE40 default n depends on ISA_ARCV2 select HIGHMEM + select PHYS_ADDR_T_64BIT help Enable access to physical memory beyond 4G, only supported on ARC cores with 40 bit Physical Addressing support -config ARCH_PHYS_ADDR_T_64BIT - def_bool ARC_HAS_PAE40 - -config ARCH_DMA_ADDR_T_64BIT - bool - config ARC_KVADDR_SIZE int "Kernel Virtual Address Space size (MB)" range 0 512 diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 4bd5d4369e05..bbdcb955e18f 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += bugs.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h generic-y += fb.h diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h deleted file mode 100644 index 7a16824bfe98..000000000000 --- a/arch/arc/include/asm/dma-mapping.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * DMA Mapping glue for ARC - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef ASM_ARC_DMA_MAPPING_H -#define ASM_ARC_DMA_MAPPING_H - -extern const struct dma_map_ops arc_dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &arc_dma_ops; -} - -#endif diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h index ba56c23c1b20..4ff53c041c64 100644 --- a/arch/arc/include/asm/pci.h +++ b/arch/arc/include/asm/pci.h @@ -16,12 +16,6 @@ #define PCIBIOS_MIN_MEM 0x100000 #define pcibios_assign_all_busses() 1 -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS 1 #endif /* __KERNEL__ */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 1dcc404b5aec..8c1071840979 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -16,13 +16,12 @@ * The default DMA address == Phy address which is 0x8000_0000 based. */ -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <asm/cache.h> #include <asm/cacheflush.h> - -static void *arc_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { unsigned long order = get_order(size); struct page *page; @@ -89,7 +88,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, return kvaddr; } -static void arc_dma_free(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { phys_addr_t paddr = dma_handle; @@ -105,9 +104,9 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } -static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -130,149 +129,14 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -/* - * streaming DMA Mapping API... - * CPU accesses page via normal paddr, thus needs to explicitly made - * consistent before each use - */ -static void _dma_cache_sync(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - switch (dir) { - case DMA_FROM_DEVICE: - dma_cache_inv(paddr, size); - break; - case DMA_TO_DEVICE: - dma_cache_wback(paddr, size); - break; - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(paddr, size); - break; - default: - pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr); - } + dma_cache_wback(paddr, size); } -/* - * arc_dma_map_page - map a portion of a page for streaming DMA - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_page(). - * - * Note: while it takes struct page as arg, caller can "abuse" it to pass - * a region larger than PAGE_SIZE, provided it is physically contiguous - * and this still works correctly - */ -static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - phys_addr_t paddr = page_to_phys(page) + offset; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - _dma_cache_sync(paddr, size, dir); - - return paddr; -} - -/* - * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - * - * Note: historically this routine was not implemented for ARC - */ -static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = handle; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - _dma_cache_sync(paddr, size, dir); + dma_cache_inv(paddr, size); } - -static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - s->dma_address = dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); - - return nents; -} - -static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, - attrs); -} - -static void arc_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); -} - -static void arc_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); -} - -static void arc_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction dir) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); -} - -static void arc_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction dir) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); -} - -static int arc_dma_supported(struct device *dev, u64 dma_mask) -{ - /* Support 32 bit DMA mask exclusively */ - return dma_mask == DMA_BIT_MASK(32); -} - -const struct dma_map_ops arc_dma_ops = { - .alloc = arc_dma_alloc, - .free = arc_dma_free, - .mmap = arc_dma_mmap, - .map_page = arc_dma_map_page, - .unmap_page = arc_dma_unmap_page, - .map_sg = arc_dma_map_sg, - .unmap_sg = arc_dma_unmap_sg, - .sync_single_for_device = arc_dma_sync_single_for_device, - .sync_single_for_cpu = arc_dma_sync_single_for_cpu, - .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu, - .sync_sg_for_device = arc_dma_sync_sg_for_device, - .dma_supported = arc_dma_supported, -}; -EXPORT_SYMBOL(arc_dma_ops); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a7f8e7f4b88f..c43f5bb55ac8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -60,7 +60,6 @@ config ARM select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE @@ -96,6 +95,7 @@ config ARM select HAVE_VIRT_CPU_ACCOUNTING_GEN select IRQ_FORCED_THREADING select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE select NO_BOOTMEM select OF_EARLY_FLATTREE if OF select OF_RESERVED_MEM if OF @@ -119,9 +119,6 @@ config ARM_HAS_SG_CHAIN select ARCH_HAS_SG_CHAIN bool -config NEED_SG_DMA_LENGTH - bool - config ARM_DMA_USE_IOMMU bool select ARM_HAS_SG_CHAIN @@ -224,9 +221,6 @@ config ARCH_MAY_HAVE_PC_FDC config ZONE_DMA bool -config NEED_DMA_MAP_STATE - def_bool y - config ARCH_SUPPORTS_UPROBES def_bool y @@ -1778,12 +1772,6 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. -config SWIOTLB - def_bool y - -config IOMMU_HELPER - def_bool SWIOTLB - config PARAVIRT bool "Enable paravirtualization code" help @@ -1815,6 +1803,7 @@ config XEN depends on MMU select ARCH_DMA_ADDR_T_64BIT select ARM_PSCI + select SWIOTLB select SWIOTLB_XEN select PARAVIRT help diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 77e8436beed4..3a1c6b45c9a1 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -76,7 +76,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi"; clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_HDMI0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, - <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, + <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH1>, <&ccu CLK_HDMI>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; @@ -88,7 +88,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0"; clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_BE0>, - <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_TCON0_CH0>, + <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH0>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; }; @@ -99,7 +99,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0"; clocks = <&ccu CLK_AHB_TVE0>, <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, - <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, + <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH1>, <&ccu CLK_DRAM_TVE0>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts index 3328fe583c9b..232f124ce62c 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts @@ -117,6 +117,7 @@ phy-handle = <&int_mii_phy>; phy-mode = "mii"; allwinner,leds-active-low; + status = "okay"; }; &hdmi { diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts index d1311098ea45..ad173605b1b8 100644 --- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts +++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts @@ -51,7 +51,7 @@ leds { /* The LEDs use PG0~2 pins, which conflict with MMC1 */ - status = "disbaled"; + status = "disabled"; }; }; diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 1f0de808d111..0abd389cf0ec 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -19,13 +19,6 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif /* CONFIG_PCI_DOMAINS */ -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #define HAVE_PCI_MMAP #define ARCH_GENERIC_PCI_MMAP_RESOURCE diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index e651c4d0a0d9..6739d37c2bc5 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -276,21 +276,9 @@ static int proc_dma_show(struct seq_file *m, void *v) return 0; } -static int proc_dma_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_dma_show, NULL); -} - -static const struct file_operations proc_dma_operations = { - .open = proc_dma_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_dma_init(void) { - proc_create("dma", 0, NULL, &proc_dma_operations); + proc_create_single("dma", 0, NULL, proc_dma_show); return 0; } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index fc40a2b40595..35ca494c028c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -754,7 +754,7 @@ int __init arm_add_memory(u64 start, u64 size) else size -= aligned_start - start; -#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT +#ifndef CONFIG_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", (long long)start); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 3bda08bee674..6e971e114879 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -91,18 +91,6 @@ static int proc_status_show(struct seq_file *m, void *v) seq_printf(m, "Last process:\t\t%d\n", previous_pid); return 0; } - -static int proc_status_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_status_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_status_fops = { - .open = proc_status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* @@ -260,7 +248,8 @@ static int __init swp_emulation_init(void) return 0; #ifdef CONFIG_PROC_FS - if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops)) + if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL, + proc_status_show)) return -ENOMEM; #endif /* CONFIG_PROC_FS */ diff --git a/arch/arm/mach-axxia/Kconfig b/arch/arm/mach-axxia/Kconfig index bb2ce1c63fd9..d3eae6037913 100644 --- a/arch/arm/mach-axxia/Kconfig +++ b/arch/arm/mach-axxia/Kconfig @@ -2,7 +2,6 @@ config ARCH_AXXIA bool "LSI Axxia platforms" depends on ARCH_MULTI_V7 && ARM_LPAE - select ARCH_DMA_ADDR_T_64BIT select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index c2f3b0d216a4..c46a728df44e 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -211,7 +211,6 @@ config ARCH_BRCMSTB select BRCMSTB_L2_IRQ select BCM7120_L2_IRQ select ARCH_HAS_HOLES_MEMORYMODEL - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ZONE_DMA if ARM_LPAE select SOC_BRCMSTB select SOC_BUS diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index e70feec6fad5..0581ffbedddd 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -323,7 +323,7 @@ void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr) /* All EP93xx devices use the same two GPIO pins for I2C bit-banging */ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { /* Use local offsets on gpiochip/port "G" */ GPIO_LOOKUP_IDX("G", 1, NULL, 0, diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 647c319f9f5f..2ca405816846 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -112,7 +112,6 @@ config SOC_EXYNOS5440 bool "SAMSUNG EXYNOS5440" default y depends on ARCH_EXYNOS5 - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select HAVE_ARM_ARCH_TIMER select AUTO_ZRELADDR select PINCTRL_EXYNOS5440 diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 81110ec34226..5552968f07f8 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -1,7 +1,6 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" depends on ARCH_MULTI_V7 - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA diff --git a/arch/arm/mach-ixp4xx/avila-setup.c b/arch/arm/mach-ixp4xx/avila-setup.c index 77def6169f50..44cbbce6bda6 100644 --- a/arch/arm/mach-ixp4xx/avila-setup.c +++ b/arch/arm/mach-ixp4xx/avila-setup.c @@ -51,7 +51,7 @@ static struct platform_device avila_flash = { }; static struct gpiod_lookup_table avila_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", AVILA_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index 0f5c99941a7d..397190f3a8da 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -70,7 +70,7 @@ static struct platform_device dsmg600_flash = { }; static struct gpiod_lookup_table dsmg600_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", DSMG600_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c index 033f79b35d51..f0a152e365b1 100644 --- a/arch/arm/mach-ixp4xx/fsg-setup.c +++ b/arch/arm/mach-ixp4xx/fsg-setup.c @@ -56,7 +56,7 @@ static struct platform_device fsg_flash = { }; static struct gpiod_lookup_table fsg_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", FSG_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index b168e2fbdbeb..3ec829d52cdd 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -124,7 +124,7 @@ static struct platform_device ixdp425_flash_nand = { #endif /* CONFIG_MTD_NAND_PLATFORM */ static struct gpiod_lookup_table ixdp425_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", IXDP425_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index 76dfff03cb71..4138d6aa4c52 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -102,7 +102,7 @@ static struct platform_device nas100d_leds = { }; static struct gpiod_lookup_table nas100d_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NAS100D_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c index 91da63a7d7b5..341b263482ef 100644 --- a/arch/arm/mach-ixp4xx/nslu2-setup.c +++ b/arch/arm/mach-ixp4xx/nslu2-setup.c @@ -70,7 +70,7 @@ static struct platform_device nslu2_flash = { }; static struct gpiod_lookup_table nslu2_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NSLU2_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 5877e547cecd..0adb1bd6208e 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -322,7 +322,7 @@ static struct soc_camera_link palmz72_iclink = { }; static struct gpiod_lookup_table palmz72_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("gpio-pxa", 118, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index 90d0f277de55..207dcc2e94e7 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -460,7 +460,7 @@ static struct platform_device smc91x_device = { /* i2c */ static struct gpiod_lookup_table viper_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.1", .table = { GPIO_LOOKUP_IDX("gpio-pxa", VIPER_RTC_I2C_SDA_GPIO, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), @@ -789,7 +789,7 @@ static int __init viper_tpm_setup(char *str) __setup("tpm=", viper_tpm_setup); struct gpiod_lookup_table viper_tpm_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", VIPER_TPM_I2C_SDA_GPIO, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig index a4065966881a..fafd3d7f9f8c 100644 --- a/arch/arm/mach-rockchip/Kconfig +++ b/arch/arm/mach-rockchip/Kconfig @@ -3,7 +3,6 @@ config ARCH_ROCKCHIP depends on ARCH_MULTI_V7 select PINCTRL select PINCTRL_ROCKCHIP - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_HAS_RESET_CONTROLLER select ARM_AMBA select ARM_GIC diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index bdb5ec1cf560..39aef4876ed4 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -657,25 +657,13 @@ static int ecard_devices_proc_show(struct seq_file *m, void *v) return 0; } -static int ecard_devices_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ecard_devices_proc_show, NULL); -} - -static const struct file_operations bus_ecard_proc_fops = { - .owner = THIS_MODULE, - .open = ecard_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static struct proc_dir_entry *proc_bus_ecard_dir = NULL; static void ecard_proc_init(void) { proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL); - proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops); + proc_create_single("devices", 0, proc_bus_ecard_dir, + ecard_devices_proc_show); } #define ec_set_resource(ec,nr,st,sz) \ diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index ace010479eb6..f45aed2519ba 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -327,7 +327,7 @@ static struct platform_device simpad_gpio_leds = { * i2c */ static struct gpiod_lookup_table simpad_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("gpio", 21, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 280e7312a9e1..fe60cd09a5ca 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -29,7 +29,6 @@ config ARCH_RMOBILE menuconfig ARCH_RENESAS bool "Renesas ARM SoCs" depends on ARCH_MULTI_V7 && MMU - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_SHMOBILE select ARM_GIC select GPIOLIB diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 1e0aeb47bac6..7f3b83e0d324 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -15,6 +15,5 @@ menuconfig ARCH_TEGRA select RESET_CONTROLLER select SOC_BUS select ZONE_DMA if ARM_LPAE - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE help This enables support for NVIDIA Tegra based systems. diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7f14acf67caf..5a016bc80e26 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -661,6 +661,7 @@ config ARM_LPAE bool "Support for the Large Physical Address Extension" depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ !CPU_32v4 && !CPU_32v3 + select PHYS_ADDR_T_64BIT help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the @@ -673,12 +674,6 @@ config ARM_PV_FIXUP def_bool y depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE -config ARCH_PHYS_ADDR_T_64BIT - def_bool ARM_LPAE - -config ARCH_DMA_ADDR_T_64BIT - bool - config ARM_THUMB bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT depends on CPU_THUMB_CAPABLE diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 619f24a42d09..f448a0663b10 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -241,12 +241,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_teardown_dma_ops(struct device *dev) { } - -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -core_initcall(dma_debug_do_init); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8c398fedbbb6..4b6613b5e042 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) void __init dma_contiguous_remap(void) { int i; - - if (!dma_mmu_remap_num) - return; - - /* call flush_cache_all() since CMA area would be large enough */ - flush_cache_all(); for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; @@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void) flush_tlb_kernel_range(__phys_to_virt(start), __phys_to_virt(end)); - /* - * All the memory in CMA region will be on ZONE_MOVABLE. - * If that zone is considered as highmem, the memory in CMA - * region is also considered as highmem even if it's - * physical address belong to lowmem. In this case, - * re-mapping isn't required. - */ - if (!is_highmem_idx(ZONE_MOVABLE)) - iotable_init(&map, 1); + iotable_init(&map, 1); } } @@ -1165,15 +1151,6 @@ int arm_dma_supported(struct device *dev, u64 mask) return __dma_supported(dev, mask, false); } -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -core_initcall(dma_debug_do_init); - #ifdef CONFIG_ARM_DMA_USE_IOMMU static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb2cf4938f6d..b25ed7834f6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,7 +105,6 @@ config ARM64 select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -133,6 +132,8 @@ config ARM64 select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA select MULTI_IRQ_HANDLER + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -142,6 +143,7 @@ config ARM64 select POWER_SUPPLY select REFCOUNT_FULL select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK help @@ -150,9 +152,6 @@ config ARM64 config 64BIT def_bool y -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - config MMU def_bool y @@ -237,24 +236,9 @@ config ZONE_DMA32 config HAVE_GENERIC_GUP def_bool y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config SMP def_bool y -config SWIOTLB - def_bool y - -config IOMMU_HELPER - def_bool SWIOTLB - config KERNEL_MODE_NEON def_bool y diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 724a0d3b7683..edb4ee0b8896 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -299,7 +299,6 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ dwmmc_0: dwmmc0@f723d000 { - max-frequency = <150000000>; cap-mmc-highspeed; mmc-hs200-1_8v; non-removable; diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 8747f7c5e0e7..9e690686e8aa 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -18,11 +18,6 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) -/* - * PCI address space differs from physical memory address space - */ -#define PCI_DMA_BUS_IS_PHYS (0) - #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..d894a20b70b2 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index d3db9b2cd479..0fdff97794de 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -1,17 +1,6 @@ -/* - * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/linkage.h> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..db01f2709842 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -508,16 +508,6 @@ static int __init arm64_dma_init(void) } arch_initcall(arm64_dma_init); -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_debug_do_init); - - #ifdef CONFIG_IOMMU_DMA #include <linux/dma-iommu.h> #include <linux/platform_device.h> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..2af3dd89bcdb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, static void __do_user_fault(struct siginfo *info, unsigned int esr) { current->thread.fault_address = (unsigned long)info->si_addr; + + /* + * If the faulting address is in the kernel, we must sanitize the ESR. + * From userspace's point of view, kernel-only mappings don't exist + * at all, so we report them as level 0 translation faults. + * (This is not quite the way that "no mapping there at all" behaves: + * an alignment fault not caused by the memory type would take + * precedence over translation fault for a real access to empty + * space. Unfortunately we can't easily distinguish "alignment fault + * not caused by memory type" from "alignment fault caused by memory + * type", so we ignore this wrinkle and just return the translation + * fault.) + */ + if (current->thread.fault_address >= TASK_SIZE) { + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_LOW: + /* + * These bits provide only information about the + * faulting instruction, which userspace knows already. + * We explicitly clear bits which are architecturally + * RES0 in case they are given meanings in future. + * We always report the ESR as if the fault was taken + * to EL1 and so ISV and the bits in ISS[23:14] are + * clear. (In fact it always will be a fault to EL1.) + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | + ESR_ELx_CM | ESR_ELx_WNR; + esr |= ESR_ELx_FSC_FAULT; + break; + case ESR_ELx_EC_IABT_LOW: + /* + * Claim a level 0 translation fault. + * All other bits are architecturally RES0 for faults + * reported with that DFSC value, so we clear them. + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; + esr |= ESR_ELx_FSC_FAULT; + break; + default: + /* + * This should never happen (entry.S only brings us + * into this code for insn and data aborts from a lower + * exception level). Fail safe by not providing an ESR + * context record at all. + */ + WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } + } + current->thread.fault_code = esr; arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2dbb2c9f1ec1..493ff75670ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index c6b4dd1418b4..bf59855628ac 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -6,11 +6,13 @@ config C6X def_bool y + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP + select DMA_NONCOHERENT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_MEMBLOCK select SPARSE_IRQ select IRQ_DOMAIN diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index fd4c840de837..434600e47662 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += current.h generic-y += device.h generic-y += div64.h generic-y += dma.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h deleted file mode 100644 index 05daf1038111..000000000000 --- a/arch/c6x/include/asm/dma-mapping.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#ifndef _ASM_C6X_DMA_MAPPING_H -#define _ASM_C6X_DMA_MAPPING_H - -extern const struct dma_map_ops c6x_dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &c6x_dma_ops; -} - -extern void coherent_mem_init(u32 start, u32 size); -void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, unsigned long attrs); -void c6x_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs); - -#endif /* _ASM_C6X_DMA_MAPPING_H */ diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h index 852afb209afb..350f34debb19 100644 --- a/arch/c6x/include/asm/setup.h +++ b/arch/c6x/include/asm/setup.h @@ -28,5 +28,7 @@ extern unsigned char c6x_fuse_mac[6]; extern void machine_init(unsigned long dt_ptr); extern void time_init(void); +extern void coherent_mem_init(u32 start, u32 size); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_C6X_SETUP_H */ diff --git a/arch/c6x/kernel/Makefile b/arch/c6x/kernel/Makefile index 02f340d7b8fe..fbe74174de87 100644 --- a/arch/c6x/kernel/Makefile +++ b/arch/c6x/kernel/Makefile @@ -8,6 +8,6 @@ extra-y := head.o vmlinux.lds obj-y := process.o traps.o irq.o signal.o ptrace.o obj-y += setup.o sys_c6x.o time.o devicetree.o obj-y += switch_to.o entry.o vectors.o c6x_ksyms.o -obj-y += soc.o dma.o +obj-y += soc.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c deleted file mode 100644 index 9fff8be75f58..000000000000 --- a/arch/c6x/kernel/dma.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2011 Texas Instruments Incorporated - * Author: Mark Salter <msalter@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/mm_types.h> -#include <linux/scatterlist.h> - -#include <asm/cacheflush.h> - -static void c6x_dma_sync(dma_addr_t handle, size_t size, - enum dma_data_direction dir) -{ - unsigned long paddr = handle; - - BUG_ON(!valid_dma_direction(dir)); - - switch (dir) { - case DMA_FROM_DEVICE: - L2_cache_block_invalidate(paddr, paddr + size); - break; - case DMA_TO_DEVICE: - L2_cache_block_writeback(paddr, paddr + size); - break; - case DMA_BIDIRECTIONAL: - L2_cache_block_writeback_invalidate(paddr, paddr + size); - break; - default: - break; - } -} - -static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t handle = virt_to_phys(page_address(page) + offset); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(handle, size, dir); - - return handle; -} - -static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(handle, size, dir); -} - -static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) { - sg->dma_address = sg_phys(sg); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(sg->dma_address, sg->length, dir); - } - - return nents; -} - -static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync(sg_dma_address(sg), sg->length, dir); -} - -static void c6x_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - c6x_dma_sync(handle, size, dir); - -} - -static void c6x_dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - c6x_dma_sync(handle, size, dir); - -} - -static void c6x_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nents, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync_single_for_cpu(dev, sg_dma_address(sg), - sg->length, dir); - -} - -static void c6x_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sglist, int nents, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync_single_for_device(dev, sg_dma_address(sg), - sg->length, dir); - -} - -const struct dma_map_ops c6x_dma_ops = { - .alloc = c6x_dma_alloc, - .free = c6x_dma_free, - .map_page = c6x_dma_map_page, - .unmap_page = c6x_dma_unmap_page, - .map_sg = c6x_dma_map_sg, - .unmap_sg = c6x_dma_unmap_sg, - .sync_single_for_device = c6x_dma_sync_single_for_device, - .sync_single_for_cpu = c6x_dma_sync_single_for_cpu, - .sync_sg_for_device = c6x_dma_sync_sg_for_device, - .sync_sg_for_cpu = c6x_dma_sync_sg_for_cpu, -}; -EXPORT_SYMBOL(c6x_dma_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index 95e38ad27c69..d0a8e0c4b27e 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -19,10 +19,12 @@ #include <linux/bitops.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/memblock.h> +#include <asm/cacheflush.h> #include <asm/page.h> +#include <asm/setup.h> /* * DMA coherent memory management, can be redefined using the memdma= @@ -73,7 +75,7 @@ static void __free_dma_pages(u32 addr, int order) * Allocate DMA coherent memory space and return both the kernel * virtual and DMA address for that space. */ -void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { u32 paddr; @@ -98,7 +100,7 @@ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, /* * Free DMA coherent memory as defined by the above mapping. */ -void c6x_dma_free(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { int order; @@ -139,3 +141,35 @@ void __init coherent_mem_init(phys_addr_t start, u32 size) dma_bitmap = phys_to_virt(bitmap_phys); memset(dma_bitmap, 0, dma_pages * PAGE_SIZE); } + +static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + BUG_ON(!valid_dma_direction(dir)); + + switch (dir) { + case DMA_FROM_DEVICE: + L2_cache_block_invalidate(paddr, paddr + size); + break; + case DMA_TO_DEVICE: + L2_cache_block_writeback(paddr, paddr + size); + break; + case DMA_BIDIRECTIONAL: + L2_cache_block_writeback_invalidate(paddr, paddr + size); + break; + default: + break; + } +} + +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + return c6x_dma_sync(dev, paddr, size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + return c6x_dma_sync(dev, paddr, size, dir); +} diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h index 7c9e55d62215..d4d345a52092 100644 --- a/arch/h8300/include/asm/pci.h +++ b/arch/h8300/include/asm/pci.h @@ -15,6 +15,4 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) /* We don't do dynamic PCI IRQ allocation */ } -#define PCI_DMA_BUS_IS_PHYS (1) - #endif /* _ASM_H8300_PCI_H */ diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 76d2f20d525e..37adb2003033 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -19,6 +19,7 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select NEED_SG_DMA_LENGTH select NO_IOPORT_MAP select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD @@ -63,9 +64,6 @@ config GENERIC_CSUM config GENERIC_IRQ_PROBE def_bool y -config NEED_SG_DMA_LENGTH - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool n diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index ad8347c29dcf..77459df34e2e 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -208,7 +208,6 @@ const struct dma_map_ops hexagon_dma_ops = { .sync_single_for_cpu = hexagon_sync_single_for_cpu, .sync_single_for_device = hexagon_sync_single_for_device, .mapping_error = hexagon_mapping_error, - .is_phys = 1, }; void __init hexagon_dma_init(void) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index bbe12a038d21..2067289fad4a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select HAVE_FUNCTION_TRACER select TTY select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING @@ -54,6 +53,8 @@ config IA64 select MODULES_USE_ELF_RELA select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_AUDITSYSCALL + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -78,18 +79,6 @@ config MMU bool default y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - -config SWIOTLB - bool - config STACKTRACE_SUPPORT def_bool y @@ -146,7 +135,6 @@ config IA64_GENERIC bool "generic" select NUMA select ACPI_NUMA - select DMA_DIRECT_OPS select SWIOTLB select PCI_MSI help @@ -167,7 +155,6 @@ config IA64_GENERIC config IA64_DIG bool "DIG-compliant" - select DMA_DIRECT_OPS select SWIOTLB config IA64_DIG_VTD @@ -183,7 +170,6 @@ config IA64_HP_ZX1 config IA64_HP_ZX1_SWIOTLB bool "HP-zx1/sx1000 with software I/O TLB" - select DMA_DIRECT_OPS select SWIOTLB help Build a kernel that runs on HP zx1 and sx1000 systems even when they @@ -207,7 +193,6 @@ config IA64_SGI_UV bool "SGI-UV" select NUMA select ACPI_NUMA - select DMA_DIRECT_OPS select SWIOTLB help Selecting this option will optimize the kernel for use on UV based @@ -218,7 +203,6 @@ config IA64_SGI_UV config IA64_HP_SIM bool "Ski-simulator" - select DMA_DIRECT_OPS select SWIOTLB depends on !PM @@ -613,6 +597,3 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" - -config IOMMU_HELPER - def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index aec4a3354abe..ee5b652d320a 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1845,9 +1845,6 @@ static void ioc_init(unsigned long hpa, struct ioc *ioc) ioc_resource_init(ioc); ioc_sac_init(ioc); - if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) - ia64_max_iommu_merge_mask = ~iovp_mask; - printk(KERN_INFO PFX "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, @@ -1942,19 +1939,6 @@ static const struct seq_operations ioc_seq_ops = { .show = ioc_show }; -static int -ioc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ioc_seq_ops); -} - -static const struct file_operations ioc_fops = { - .open = ioc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void __init ioc_proc_init(void) { @@ -1964,7 +1948,7 @@ ioc_proc_init(void) if (!dir) return; - proc_create(ioc_list->name, 0, dir, &ioc_fops); + proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops); } #endif diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index a419ccf33cde..663388a73d4e 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -435,19 +435,6 @@ static int rs_proc_show(struct seq_file *m, void *v) return 0; } -static int rs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rs_proc_show, NULL); -} - -static const struct file_operations rs_proc_fops = { - .owner = THIS_MODULE, - .open = rs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_operations hp_ops = { .open = rs_open, .close = rs_close, @@ -462,7 +449,7 @@ static const struct tty_operations hp_ops = { .unthrottle = rs_unthrottle, .send_xchar = rs_send_xchar, .hangup = rs_hangup, - .proc_fops = &rs_proc_fops, + .proc_show = rs_proc_show, }; static const struct tty_port_operations hp_port_ops = { diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index b1d04e8bafc8..780e8744ba85 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -30,23 +30,6 @@ struct pci_vector_struct { #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -/* - * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct - * correspondence between device bus addresses and CPU physical addresses. - * Platforms with a hardware I/O MMU _must_ turn this off to suppress the - * bounce buffer handling code in the block and network device layers. - * Platforms with separate bus address spaces _must_ turn this off and provide - * a device DMA mapping implementation that takes care of the necessary - * address translation. - * - * For now, the ia64 platforms which may have separate/multiple bus address - * spaces all have I/O MMUs which support the merging of physically - * discontiguous buffers, so we can use that as the sole factor to determine - * the setting of PCI_DMA_BUS_IS_PHYS. - */ -extern unsigned long ia64_max_iommu_merge_mask; -#define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL) - #define HAVE_PCI_MMAP #define ARCH_GENERIC_PCI_MMAP_RESOURCE #define arch_can_pci_mmap_wc() 1 diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index f2d57e66fd86..7a471d8d67d4 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -9,16 +9,6 @@ int iommu_detected __read_mostly; const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); - const struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index b6e597860888..f4a94241265c 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -920,18 +920,6 @@ static int proc_palinfo_show(struct seq_file *m, void *v) return 0; } -static int proc_palinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_palinfo_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_palinfo_fops = { - .open = proc_palinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int palinfo_add_proc(unsigned int cpu) { pal_func_cpu_u_t f; @@ -948,8 +936,8 @@ static int palinfo_add_proc(unsigned int cpu) for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - proc_create_data(palinfo_entries[j].name, 0, cpu_dir, - &proc_palinfo_fops, (void *)f.value); + proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir, + proc_palinfo_show, (void *)f.value); } return 0; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 8fb280e33114..3b38c717008a 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5708,13 +5708,6 @@ const struct seq_operations pfm_seq_ops = { .show = pfm_proc_show }; -static int -pfm_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pfm_seq_ops); -} - - /* * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens * during pfm_enable() hence before pfm_start(). We cannot assume monitoring @@ -6537,13 +6530,6 @@ found: return 0; } -static const struct file_operations pfm_proc_fops = { - .open = pfm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - int __init pfm_init(void) { @@ -6615,7 +6601,7 @@ pfm_init(void) /* * create /proc/perfmon (mostly for debugging purposes) */ - perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); + perfmon_dir = proc_create_seq("perfmon", S_IRUGO, NULL, &pfm_seq_ops); if (perfmon_dir == NULL) { printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); pmu_conf = NULL; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 52c404b08904..aba1f463a8dd 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -54,8 +54,6 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); MODULE_LICENSE("GPL"); -static const struct file_operations proc_salinfo_fops; - typedef struct { const char *name; /* name of the proc entry */ unsigned long feature; /* feature bit */ @@ -578,6 +576,17 @@ static int salinfo_cpu_pre_down(unsigned int cpu) return 0; } +/* + * 'data' contains an integer that corresponds to the feature we're + * testing + */ +static int proc_salinfo_show(struct seq_file *m, void *v) +{ + unsigned long data = (unsigned long)v; + seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); + return 0; +} + static int __init salinfo_init(void) { @@ -593,9 +602,9 @@ salinfo_init(void) for (i=0; i < NR_SALINFO_ENTRIES; i++) { /* pass the feature bit in question as misc data */ - *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir, - &proc_salinfo_fops, - (void *)salinfo_entries[i].feature); + *sdir++ = proc_create_single_data(salinfo_entries[i].name, 0, + salinfo_dir, proc_salinfo_show, + (void *)salinfo_entries[i].feature); } for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { @@ -633,27 +642,4 @@ salinfo_init(void) return 0; } -/* - * 'data' contains an integer that corresponds to the feature we're - * testing - */ -static int proc_salinfo_show(struct seq_file *m, void *v) -{ - unsigned long data = (unsigned long)v; - seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); - return 0; -} - -static int proc_salinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_salinfo_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_salinfo_fops = { - .open = proc_salinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - module_init(salinfo_init); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dee56bcb993d..ad43cbf70628 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -124,18 +124,6 @@ unsigned long ia64_i_cache_stride_shift = ~0; unsigned long ia64_cache_stride_shift = ~0; /* - * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This - * mask specifies a mask of address bits that must be 0 in order for two buffers to be - * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start - * address of the second buffer must be aligned to (merge_mask+1) in order to be - * mergeable). By default, we assume there is no I/O MMU which can merge physically - * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu - * page-size of 2^64. - */ -unsigned long ia64_max_iommu_merge_mask = ~0UL; -EXPORT_SYMBOL(ia64_max_iommu_merge_mask); - -/* * We use a special marker for the end of memory and it uses the extra (+1) slot */ struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 11f2275570fb..8479e9a7ce16 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -480,11 +480,6 @@ sn_io_early_init(void) tioca_init_provider(); tioce_init_provider(); - /* - * This is needed to avoid bounce limit checks in the blk layer - */ - ia64_max_iommu_merge_mask = ~PAGE_MASK; - sn_irq_lh_init(); INIT_LIST_HEAD(&sn_sysdata_list); sn_init_cpei_timer(); diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index ec4de2b09653..e15457bf21ac 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -140,18 +140,6 @@ static int proc_fit_show(struct seq_file *m, void *v) return 0; } -static int proc_fit_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_fit_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_fit_fops = { - .open = proc_fit_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int proc_version_show(struct seq_file *m, void *v) { unsigned long nasid = (unsigned long)m->private; @@ -174,18 +162,6 @@ static int proc_version_show(struct seq_file *m, void *v) return 0; } -static int proc_version_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_version_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_version_fops = { - .open = proc_version_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* module entry points */ int __init prominfo_init(void); void __exit prominfo_exit(void); @@ -217,10 +193,10 @@ int __init prominfo_init(void) if (!dir) continue; nasid = cnodeid_to_nasid(cnodeid); - proc_create_data("fit", 0, dir, - &proc_fit_fops, (void *)nasid); - proc_create_data("version", 0, dir, - &proc_version_fops, (void *)nasid); + proc_create_single_data("fit", 0, dir, proc_fit_show, + (void *)nasid); + proc_create_single_data("version", 0, dir, proc_version_show, + (void *)nasid); } return 0; } diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 29cf8f8c08e9..c2a4d84297b0 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -18,33 +18,18 @@ static int partition_id_show(struct seq_file *s, void *p) return 0; } -static int partition_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, partition_id_show, NULL); -} - static int system_serial_number_show(struct seq_file *s, void *p) { seq_printf(s, "%s\n", sn_system_serial_number()); return 0; } -static int system_serial_number_open(struct inode *inode, struct file *file) -{ - return single_open(file, system_serial_number_show, NULL); -} - static int licenseID_show(struct seq_file *s, void *p) { seq_printf(s, "0x%llx\n", sn_partition_serial_number_val()); return 0; } -static int licenseID_open(struct inode *inode, struct file *file) -{ - return single_open(file, licenseID_show, NULL); -} - static int coherence_id_show(struct seq_file *s, void *p) { seq_printf(s, "%d\n", partition_coherence_id()); @@ -52,43 +37,10 @@ static int coherence_id_show(struct seq_file *s, void *p) return 0; } -static int coherence_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, coherence_id_show, NULL); -} - /* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ extern int sn_topology_open(struct inode *, struct file *); extern int sn_topology_release(struct inode *, struct file *); -static const struct file_operations proc_partition_id_fops = { - .open = partition_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_system_sn_fops = { - .open = system_serial_number_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_license_id_fops = { - .open = licenseID_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_coherence_id_fops = { - .open = coherence_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct file_operations proc_sn_topo_fops = { .open = sn_topology_open, .read = seq_read, @@ -104,13 +56,13 @@ void register_sn_procfs(void) if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) return; - proc_create("partition_id", 0444, sgi_proc_dir, - &proc_partition_id_fops); - proc_create("system_serial_number", 0444, sgi_proc_dir, - &proc_system_sn_fops); - proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops); - proc_create("coherence_id", 0444, sgi_proc_dir, - &proc_coherence_id_fops); + proc_create_single("partition_id", 0444, sgi_proc_dir, + partition_id_show); + proc_create_single("system_serial_number", 0444, sgi_proc_dir, + system_serial_number_show); + proc_create_single("licenseID", 0444, sgi_proc_dir, licenseID_show); + proc_create_single("coherence_id", 0444, sgi_proc_dir, + coherence_id_show); proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); } diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h index ef26fae8cf0b..5a4bc223743b 100644 --- a/arch/m68k/include/asm/pci.h +++ b/arch/m68k/include/asm/pci.h @@ -4,12 +4,6 @@ #include <asm-generic/pci.h> -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #define pcibios_assign_all_busses() 1 #define PCIBIOS_MIN_IO 0x00000100 diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index dd25bfc22fb4..f35e3ebd6331 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -527,21 +527,9 @@ static int hardware_proc_show(struct seq_file *m, void *v) return 0; } -static int hardware_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, hardware_proc_show, NULL); -} - -static const struct file_operations hardware_proc_fops = { - .open = hardware_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_hardware_init(void) { - proc_create("hardware", 0, NULL, &hardware_proc_fops); + proc_create_single("hardware", 0, NULL, hardware_proc_show); return 0; } module_init(proc_hardware_init); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3817a3e2146c..d14782100088 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -19,7 +19,6 @@ config MICROBLAZE select HAVE_ARCH_HASH select HAVE_ARCH_KGDB select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 5de871eb4a59..00337861472e 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -62,12 +62,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -/* The PCI address space does equal the physical memory - * address space (no IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index c91e8cef98dd..3145e7dc8ab1 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -184,14 +184,3 @@ const struct dma_map_ops dma_nommu_ops = { .sync_sg_for_device = dma_nommu_sync_sg_for_device, }; EXPORT_SYMBOL(dma_nommu_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 225c95da23ce..2dcdc13cd65d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -42,7 +42,6 @@ config MIPS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EXIT_THREAD @@ -132,7 +131,7 @@ config MIPS_GENERIC config MIPS_ALCHEMY bool "Alchemy processor based machines" - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select CEVT_R4K select CSRC_R4K select IRQ_MIPS_CPU @@ -890,7 +889,7 @@ config CAVIUM_OCTEON_SOC bool "Cavium Networks Octeon SoC based boards" select CEVT_R4K select ARCH_HAS_PHYS_TO_DMA - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select DMA_COHERENT select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -912,6 +911,7 @@ config CAVIUM_OCTEON_SOC select MIPS_NR_CPU_NR_MAP_1024 select BUILTIN_DTB select MTD_COMPLEX_MAPPINGS + select SWIOTLB select SYS_SUPPORTS_RELOCATABLE help This option supports all of the Octeon reference boards from Cavium @@ -936,7 +936,7 @@ config NLM_XLR_BOARD select SWAP_IO_SPACE select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM select DMA_COHERENT @@ -962,7 +962,7 @@ config NLM_XLP_BOARD select HW_HAS_PCI select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select GPIOLIB select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN @@ -1101,9 +1101,6 @@ config GPIO_TXX9 config FW_CFE bool -config ARCH_DMA_ADDR_T_64BIT - def_bool (HIGHMEM && ARCH_PHYS_ADDR_T_64BIT) || 64BIT - config ARCH_SUPPORTS_UPROBES bool @@ -1122,9 +1119,6 @@ config DMA_NONCOHERENT bool select NEED_DMA_MAP_STATE -config NEED_DMA_MAP_STATE - bool - config SYS_HAS_EARLY_PRINTK bool @@ -1373,6 +1367,7 @@ config CPU_LOONGSON3 select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 select GPIOLIB + select SWIOTLB help The Loongson 3 processor implements the MIPS64R2 instruction set with many extensions. @@ -1770,7 +1765,7 @@ config CPU_MIPS32_R5_XPA depends on SYS_SUPPORTS_HIGHMEM select XPA select HIGHMEM - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT default n help Choose this option if you want to enable the Extended Physical @@ -2402,9 +2397,6 @@ config SB1_PASS_2_1_WORKAROUNDS default y -config ARCH_PHYS_ADDR_T_64BIT - bool - choice prompt "SmartMIPS or microMIPS ASE support" diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index b3043c08f769..aee8d7b8f091 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -18,9 +18,9 @@ #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) #endif -#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780) -#include <asm/mach-jz4740/base.h> -#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset)) +#ifdef CONFIG_MACH_INGENIC +#define INGENIC_UART0_BASE_ADDR 0x10030000 +#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset)) #endif #ifdef CONFIG_CPU_XLR diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile index 9987e0e378c5..69ca00590b8d 100644 --- a/arch/mips/boot/dts/xilfpga/Makefile +++ b/arch/mips/boot/dts/xilfpga/Makefile @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb - -obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index b5eee1a57d6c..4984e462be30 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -67,18 +67,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY help Lock the kernel's implementation of memcpy() into L2. -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - -config SWIOTLB - def_bool y - select DMA_DIRECT_OPS - select IOMMU_HELPER - select NEED_SG_DMA_LENGTH - config OCTEON_ILM tristate "Module to measure interrupt latency using Octeon CIU Timer" help diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index b51432dd10b6..0dd0d5d460a5 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb its-y := vmlinux.its.S its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S +its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 2339f42f047a..436099883022 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -121,13 +121,6 @@ extern unsigned long PCIBIOS_MIN_MEM; #include <linux/string.h> #include <asm/io.h> -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #ifdef CONFIG_PCI_DOMAINS_GENERIC static inline int pci_proc_domain(struct pci_bus *bus) { diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index b9e9bf628849..3775a8d694fb 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -721,6 +721,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) if (value & ~known_bits) return -EOPNOTSUPP; + /* Setting FRE without FR is not supported. */ + if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE) + return -EOPNOTSUPP; + /* Avoid inadvertently triggering emulation */ if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0b23b1ad99e6..0c0c23c9c9f5 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR and FIR registers separately. */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target, void *kbuf, void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); int err; if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) @@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target, err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.fcr31, fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &boot_cpu_data.fpu_id, + fir_pos, fir_pos + sizeof(u32)); return err; } @@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR register separately. Ignore the incoming FIR register + * contents though, as the register is read-only. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); u32 fcr31; int err; @@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target, ptrace_setfcr31(target, fcr31); } + if (count > 0) + err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + fir_pos, + fir_pos + sizeof(u32)); + return err; } @@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request, fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -804,7 +818,7 @@ long arch_ptrace(struct task_struct *child, long request, break; } #endif - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; @@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request, init_fp_ctx(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 2b9260f92ccd..f30c381d3e1c 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; } fregs = get_fpu_regs(child); - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -109,7 +109,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, addr & 1); break; } - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; @@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, sizeof(child->thread.fpu)); child->thread.fpu.fcr31 = 0; } - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2549fdd27ee1..0f725e9cee8f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 72af0c183969..c79e6a565572 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -130,21 +130,6 @@ config LOONGSON_UART_BASE default y depends on EARLY_PRINTK || SERIAL_8250 -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - -config SWIOTLB - bool "Soft IOMMU Support for All-Memory DMA" - default y - depends on CPU_LOONGSON3 - select DMA_DIRECT_OPS - select IOMMU_HELPER - select NEED_SG_DMA_LENGTH - select NEED_DMA_MAP_STATE - config PHYS48_TO_HT40 bool default y if CPU_LOONGSON3 diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 6f534b209971..e12dfa48b478 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* * Either no secondary cache or the available caches don't have the * subset property so we have to flush the primary caches - * explicitly + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. */ - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index dcafa43613b6..f9fef0028ca2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -402,13 +402,3 @@ static const struct dma_map_ops mips_default_dma_map_ops = { const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; EXPORT_SYMBOL(mips_dma_map_ops); - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init mips_dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(mips_dma_init); diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig index 7fcfc7fe9f14..412351c5acc6 100644 --- a/arch/mips/netlogic/Kconfig +++ b/arch/mips/netlogic/Kconfig @@ -83,10 +83,4 @@ endif config NLM_COMMON bool -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - endif diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c index dd2d9f7e9412..7649372103af 100644 --- a/arch/mips/pci/ops-pmcmsp.c +++ b/arch/mips/pci/ops-pmcmsp.c @@ -83,18 +83,6 @@ static int show_msp_pci_counts(struct seq_file *m, void *v) return 0; } -static int msp_pci_rd_cnt_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_msp_pci_counts, NULL); -} - -static const struct file_operations msp_pci_rd_cnt_fops = { - .open = msp_pci_rd_cnt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /***************************************************************************** * * FUNCTION: gen_pci_cfg_wr_show @@ -160,18 +148,6 @@ static int gen_pci_cfg_wr_show(struct seq_file *m, void *v) return 0; } -static int gen_pci_cfg_wr_open(struct inode *inode, struct file *file) -{ - return single_open(file, gen_pci_cfg_wr_show, NULL); -} - -static const struct file_operations gen_pci_cfg_wr_fops = { - .open = gen_pci_cfg_wr_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /***************************************************************************** * * FUNCTION: pci_proc_init @@ -188,8 +164,8 @@ static const struct file_operations gen_pci_cfg_wr_fops = { ****************************************************************************/ static void pci_proc_init(void) { - proc_create("pmc_msp_pci_rd_cnt", 0, NULL, &msp_pci_rd_cnt_fops); - proc_create("pmc_msp_pci_cfg_wr", 0, NULL, &gen_pci_cfg_wr_fops); + proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts); + proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show); } #endif /* CONFIG_PROC_FS && PCI_COUNTERS */ diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c index a4e55999ecb4..4bb85de9229b 100644 --- a/arch/mips/sibyte/common/bus_watcher.c +++ b/arch/mips/sibyte/common/bus_watcher.c @@ -142,24 +142,12 @@ static int bw_proc_show(struct seq_file *m, void *v) return 0; } -static int bw_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, bw_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations bw_proc_fops = { - .open = bw_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void create_proc_decoder(struct bw_stats_struct *stats) { struct proc_dir_entry *ent; - ent = proc_create_data("bus_watcher", S_IWUSR | S_IRUGO, NULL, - &bw_proc_fops, stats); + ent = proc_create_single_data("bus_watcher", S_IWUSR | S_IRUGO, NULL, + bw_proc_show, stats); if (!ent) { printk(KERN_INFO "Unable to initialize bus_watcher /proc entry\n"); return; diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 249f38d3388f..6aed974276d8 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -5,10 +5,19 @@ config NDS32 def_bool y + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_WANT_FRAME_POINTERS if FTRACE select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK + select DMA_NONCOHERENT_OPS + select GENERIC_ASHLDI3 + select GENERIC_ASHRDI3 + select GENERIC_LSHRDI3 + select GENERIC_CMPDI2 + select GENERIC_MULDI3 + select GENERIC_UCMPDI2 select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS @@ -82,6 +91,7 @@ endmenu menu "Kernel Features" source "kernel/Kconfig.preempt" +source "kernel/Kconfig.freezer" source "mm/Kconfig" source "kernel/Kconfig.hz" endmenu diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index ba44cc539da9..b8c8984d1456 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -1,10 +1,11 @@ comment "Processor Features" config CPU_BIG_ENDIAN - bool "Big endian" + def_bool !CPU_LITTLE_ENDIAN config CPU_LITTLE_ENDIAN - def_bool !CPU_BIG_ENDIAN + bool "Little endian" + default y config HWZOL bool "hardware zero overhead loop support" diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 91f933d5a962..513bb2e9baf9 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -23,9 +23,6 @@ export TEXTADDR # If we have a machine-specific directory, then include it in the build. core-y += arch/nds32/kernel/ arch/nds32/mm/ libs-y += arch/nds32/lib/ -LIBGCC_PATH := \ - $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) -libs-y += $(LIBGCC_PATH) ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""' BUILTIN_DTB := y @@ -35,8 +32,12 @@ endif ifdef CONFIG_CPU_LITTLE_ENDIAN KBUILD_CFLAGS += $(call cc-option, -EL) +KBUILD_AFLAGS += $(call cc-option, -EL) +LDFLAGS += $(call cc-option, -EL) else KBUILD_CFLAGS += $(call cc-option, -EB) +KBUILD_AFLAGS += $(call cc-option, -EB) +LDFLAGS += $(call cc-option, -EB) endif boot := arch/nds32/boot diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index 06bdf8167f5a..6f5cc29ed07f 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -13,9 +13,11 @@ generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += dma.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += export.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h @@ -49,6 +51,7 @@ generic-y += switch_to.h generic-y += timex.h generic-y += topology.h generic-y += trace_clock.h +generic-y += xor.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index c73f71d67744..8e84fc385b94 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -336,7 +336,7 @@ #define INT_MASK_mskIDIVZE ( 0x1 << INT_MASK_offIDIVZE ) #define INT_MASK_mskDSSIM ( 0x1 << INT_MASK_offDSSIM ) -#define INT_MASK_INITAIAL_VAL 0x10003 +#define INT_MASK_INITAIAL_VAL (INT_MASK_mskDSSIM|INT_MASK_mskIDIVZE) /****************************************************************************** * ir15: INT_PEND (Interrupt Pending Register) @@ -396,6 +396,7 @@ #define MMU_CTL_D8KB 1 #define MMU_CTL_UNA ( 0x1 << MMU_CTL_offUNA ) +#define MMU_CTL_CACHEABLE_NON 0 #define MMU_CTL_CACHEABLE_WB 2 #define MMU_CTL_CACHEABLE_WT 3 diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 1240f148ec0f..10b48f0d8e85 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -32,6 +32,8 @@ void flush_anon_page(struct vm_area_struct *vma, #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page(struct page *page); +void flush_kernel_vmap_range(void *addr, int size); +void invalidate_kernel_vmap_range(void *addr, int size); void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_page(struct vm_area_struct *vma, struct page *page); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/include/asm/dma-mapping.h b/arch/nds32/include/asm/dma-mapping.h deleted file mode 100644 index 2dd47d245c25..000000000000 --- a/arch/nds32/include/asm/dma-mapping.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2005-2017 Andes Technology Corporation - -#ifndef ASMNDS32_DMA_MAPPING_H -#define ASMNDS32_DMA_MAPPING_H - -extern struct dma_map_ops nds32_dma_ops; - -static inline struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &nds32_dma_ops; -} - -#endif diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h index 966e71b3c960..71cd226d6863 100644 --- a/arch/nds32/include/asm/io.h +++ b/arch/nds32/include/asm/io.h @@ -4,6 +4,8 @@ #ifndef __ASM_NDS32_IO_H #define __ASM_NDS32_IO_H +#include <linux/types.h> + extern void iounmap(volatile void __iomem *addr); #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index e27365c097b6..947f0491c9a7 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -27,6 +27,9 @@ extern void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma); extern void clear_user_highpage(struct page *page, unsigned long vaddr); +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to); +void clear_user_page(void *addr, unsigned long vaddr, struct page *page); #define __HAVE_ARCH_COPY_USER_HIGHPAGE #define clear_user_highpage clear_user_highpage #else diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6783937edbeb..d3e19a55cf53 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -152,6 +152,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_CACHE_L1 __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE) #define PAGE_MEMORY __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) #define PAGE_KERNEL __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) +#define PAGE_SHARED __pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D | _PAGE_CACHE_SHRD) #define PAGE_DEVICE __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV) #endif /* __ASSEMBLY__ */ diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index d291800fc621..d0dbd4fe9645 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,17 +3,14 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/export.h> #include <linux/string.h> -#include <linux/scatterlist.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/io.h> #include <linux/cache.h> #include <linux/highmem.h> #include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/dma-mapping.h> #include <asm/proc-fns.h> /* @@ -22,11 +19,6 @@ static pte_t *consistent_pte; static DEFINE_RAW_SPINLOCK(consistent_lock); -enum master_type { - FOR_CPU = 0, - FOR_DEVICE = 1, -}; - /* * VM region handling support. * @@ -124,10 +116,8 @@ out: return c; } -/* FIXME: attrs is not used. */ -static void *nds32_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * handle, gfp_t gfp, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) { struct page *page; struct arch_vm_region *c; @@ -232,8 +222,8 @@ no_page: return NULL; } -static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) { struct arch_vm_region *c; unsigned long flags, addr; @@ -333,145 +323,69 @@ static int __init consistent_init(void) } core_initcall(consistent_init); -static void consistent_sync(void *vaddr, size_t size, int direction, int master_type); -static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE); - return page_to_phys(page) + offset; -} - -static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU); -} - -/* - * Make an area consistent for devices. - */ -static void consistent_sync(void *vaddr, size_t size, int direction, int master_type) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - if (master_type == FOR_CPU) { - switch (direction) { - case DMA_TO_DEVICE: - break; - case DMA_FROM_DEVICE: - case DMA_BIDIRECTIONAL: - cpu_dma_inval_range(start, end); - break; - default: - BUG(); - } - } else { - /* FOR_DEVICE */ - switch (direction) { - case DMA_FROM_DEVICE: - break; - case DMA_TO_DEVICE: - case DMA_BIDIRECTIONAL: - cpu_dma_wb_range(start, end); - break; - default: - BUG(); - } - } -} -static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) +static inline void cache_op(phys_addr_t paddr, size_t size, + void (*fn)(unsigned long start, unsigned long end)) { - int i; + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned offset = paddr & ~PAGE_MASK; + size_t left = size; + unsigned long start; - for (i = 0; i < nents; i++, sg++) { - void *virt; - unsigned long pfn; - struct page *page = sg_page(sg); + do { + size_t len = left; - sg->dma_address = sg_phys(sg); - pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE; - page = pfn_to_page(pfn); if (PageHighMem(page)) { - virt = kmap_atomic(page); - consistent_sync(virt, sg->length, dir, FOR_CPU); - kunmap_atomic(virt); + void *addr; + + if (offset + len > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + } + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + start = (unsigned long)(addr + offset); + fn(start, start + len); + kunmap_atomic(addr); } else { - if (sg->offset > PAGE_SIZE) - panic("sg->offset:%08x > PAGE_SIZE\n", - sg->offset); - virt = page_address(page) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_CPU); + start = (unsigned long)phys_to_virt(paddr); + fn(start, start + size); } - } - return nents; + offset = 0; + page++; + left -= len; + } while (left); } -static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { -} - -static void -nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU); -} - -static void -nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE); -} - -static void -nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) -{ - int i; - - for (i = 0; i < nents; i++, sg++) { - char *virt = - page_address((struct page *)sg->page_link) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_CPU); + switch (dir) { + case DMA_FROM_DEVICE: + break; + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_wb_range); + break; + default: + BUG(); } } -static void -nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - int i; - - for (i = 0; i < nents; i++, sg++) { - char *virt = - page_address((struct page *)sg->page_link) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_DEVICE); + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_inval_range); + break; + default: + BUG(); } } - -struct dma_map_ops nds32_dma_ops = { - .alloc = nds32_dma_alloc_coherent, - .free = nds32_dma_free, - .map_page = nds32_dma_map_page, - .unmap_page = nds32_dma_unmap_page, - .map_sg = nds32_dma_map_sg, - .unmap_sg = nds32_dma_unmap_sg, - .sync_single_for_device = nds32_dma_sync_single_for_device, - .sync_single_for_cpu = nds32_dma_sync_single_for_cpu, - .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu, - .sync_sg_for_device = nds32_dma_sync_sg_for_device, -}; - -EXPORT_SYMBOL(nds32_dma_ops); diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index a72e83d804f5..b8ae4e9a6b93 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -118,7 +118,7 @@ common_exception_handler: /* interrupt */ 2: #ifdef CONFIG_TRACE_IRQFLAGS - jal arch_trace_hardirqs_off + jal trace_hardirqs_off #endif move $r0, $sp sethi $lp, hi20(ret_from_intr) diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S index 71f57bd70f3b..c5fdae174ced 100644 --- a/arch/nds32/kernel/head.S +++ b/arch/nds32/kernel/head.S @@ -57,14 +57,32 @@ _nodtb: isb mtsr $r4, $L1_PPTB ! load page table pointer\n" -/* set NTC0 cacheable/writeback, mutliple page size in use */ +#ifdef CONFIG_CPU_DCACHE_DISABLE + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON +#else + #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT + #else + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB + #endif +#endif + +/* set NTC cacheability, mutliple page size in use */ mfsr $r3, $MMU_CTL - li $r0, #~MMU_CTL_mskNTC0 - and $r3, $r3, $r0 +#if CONFIG_MEMORY_START >= 0xc0000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3) +#elif CONFIG_MEMORY_START >= 0x80000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2) +#elif CONFIG_MEMORY_START >= 0x40000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1) +#else + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0) +#endif + #ifdef CONFIG_ANDES_PAGE_SIZE_4KB - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)) + ori $r3, $r3, #(MMU_CTL_mskMPZIU) #else - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)|MMU_CTL_D8KB) + ori $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB) #endif #ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS li $r0, #MMU_CTL_UNA diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index ba910e9e4ecb..2f5b2ccebe47 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -293,6 +293,9 @@ void __init setup_arch(char **cmdline_p) /* paging_init() sets up the MMU and marks all pages as reserved */ paging_init(); + /* invalidate all TLB entries because the new mapping is created */ + __nds32__tlbop_flua(); + /* use generic way to parse */ parse_early_param(); diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index bc70113c0e84..8b231e910ea6 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -9,6 +9,7 @@ void save_stack_trace(struct stack_trace *trace) { save_stack_trace_tsk(current, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { @@ -45,3 +46,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) fpn = (unsigned long *)fpp; } } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c index f1198d7a5654..016f15891f6d 100644 --- a/arch/nds32/kernel/vdso.c +++ b/arch/nds32/kernel/vdso.c @@ -23,7 +23,7 @@ #include <asm/vdso_timer_info.h> #include <asm/cache_info.h> extern struct cache_info L1_cache_info[2]; -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; static unsigned long timer_mapping_base; @@ -66,16 +66,16 @@ static int __init vdso_init(void) int i; struct page **vdso_pagelist; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } /* Creat a timer io mapping to get clock cycles counter */ get_timer_node_info(); - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist */ vdso_pagelist = kcalloc(vdso_pages, sizeof(struct page *), GFP_KERNEL); @@ -83,7 +83,7 @@ static int __init vdso_init(void) return -ENOMEM; for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i] = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_spec[1].pages = &vdso_pagelist[0]; return 0; diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S index 4a2ff85f17ee..f8701ed161a8 100644 --- a/arch/nds32/lib/copy_page.S +++ b/arch/nds32/lib/copy_page.S @@ -2,6 +2,7 @@ // Copyright (C) 2005-2017 Andes Technology Corporation #include <linux/linkage.h> +#include <asm/export.h> #include <asm/page.h> .text @@ -16,6 +17,7 @@ ENTRY(copy_page) popm $r2, $r10 ret ENDPROC(copy_page) +EXPORT_SYMBOL(copy_page) ENTRY(clear_page) pushm $r1, $r9 @@ -35,3 +37,4 @@ ENTRY(clear_page) popm $r1, $r9 ret ENDPROC(clear_page) +EXPORT_SYMBOL(clear_page) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index b96a01b10ca7..e1aed9dc692d 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -19,7 +19,7 @@ #define RA(inst) (((inst) >> 15) & 0x1FUL) #define RB(inst) (((inst) >> 10) & 0x1FUL) #define SV(inst) (((inst) >> 8) & 0x3UL) -#define IMM(inst) (((inst) >> 0) & 0x3FFFUL) +#define IMM(inst) (((inst) >> 0) & 0x7FFFUL) #define RA3(inst) (((inst) >> 3) & 0x7UL) #define RT3(inst) (((inst) >> 6) & 0x7UL) @@ -28,6 +28,9 @@ #define RA5(inst) (((inst) >> 0) & 0x1FUL) #define RT4(inst) (((inst) >> 5) & 0xFUL) +#define GET_IMMSVAL(imm_value) \ + (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value) + #define __get8_data(val,addr,err) \ __asm__( \ "1: lbi.bi %1, [%2], #1\n" \ @@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) } if (imm) - shift = IMM(inst) * len; + shift = GET_IMMSVAL(IMM(inst)) * len; else shift = *idx_to_addr(regs, RB(inst)) << SV(inst); @@ -552,7 +555,7 @@ static struct ctl_table alignment_tbl[3] = { static struct ctl_table nds32_sysctl_table[2] = { { - .procname = "unaligned_acess", + .procname = "unaligned_access", .mode = 0555, .child = alignment_tbl}, {} diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 6eb786a399a2..ce8fd34497bf 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -147,6 +147,25 @@ void flush_cache_vunmap(unsigned long start, unsigned long end) cpu_icache_inval_all(); } +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + copy_page(vto, vfrom); + cpu_dcache_wbinval_page((unsigned long)vto); + cpu_icache_inval_page((unsigned long)vto); +} + +void clear_user_page(void *addr, unsigned long vaddr, struct page *page) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + clear_page(addr); + cpu_dcache_wbinval_page((unsigned long)addr); + cpu_icache_inval_page((unsigned long)addr); +} + void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { @@ -156,11 +175,9 @@ void copy_user_highpage(struct page *to, struct page *from, pto = page_to_phys(to); pfrom = page_to_phys(from); + local_irq_save(flags); if (aliasing(vaddr, (unsigned long)kfrom)) cpu_dcache_wb_page((unsigned long)kfrom); - if (aliasing(vaddr, (unsigned long)kto)) - cpu_dcache_inval_page((unsigned long)kto); - local_irq_save(flags); vto = kremap0(vaddr, pto); vfrom = kremap1(vaddr, pfrom); copy_page((void *)vto, (void *)vfrom); @@ -198,21 +215,25 @@ void flush_dcache_page(struct page *page) if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else { - int i, pc; - unsigned long vto, kaddr, flags; + unsigned long kaddr, flags; + kaddr = (unsigned long)page_address(page); - cpu_dcache_wbinval_page(kaddr); - pc = CACHE_SET(DCACHE) * CACHE_LINE_SIZE(DCACHE) / PAGE_SIZE; local_irq_save(flags); - for (i = 0; i < pc; i++) { - vto = - kremap0(kaddr + i * PAGE_SIZE, page_to_phys(page)); - cpu_dcache_wbinval_page(vto); - kunmap01(vto); + cpu_dcache_wbinval_page(kaddr); + if (mapping) { + unsigned long vaddr, kto; + + vaddr = page->index << PAGE_SHIFT; + if (aliasing(vaddr, kaddr)) { + kto = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(kto); + kunmap01(kto); + } } local_irq_restore(flags); } } +EXPORT_SYMBOL(flush_dcache_page); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len) @@ -251,7 +272,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr) { - unsigned long flags; + unsigned long kaddr, flags, ktmp; if (!PageAnon(page)) return; @@ -261,7 +282,12 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_save(flags); if (vma->vm_flags & VM_EXEC) cpu_icache_inval_page(vaddr & PAGE_MASK); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); + kaddr = (unsigned long)page_address(page); + if (aliasing(vaddr, kaddr)) { + ktmp = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(ktmp); + kunmap01(ktmp); + } local_irq_restore(flags); } @@ -272,6 +298,25 @@ void flush_kernel_dcache_page(struct page *page) cpu_dcache_wbinval_page((unsigned long)page_address(page)); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void flush_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_wb_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_inval_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); void flush_icache_range(unsigned long start, unsigned long end) { @@ -283,6 +328,7 @@ void flush_icache_range(unsigned long start, unsigned long end) cpu_cache_wbinval_range(start, end, 1); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_icache_range); void flush_icache_page(struct vm_area_struct *vma, struct page *page) { diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 93ee0160720b..c713d2ad55dc 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -30,6 +30,7 @@ extern unsigned long phys_initrd_size; * zero-initialized data and COW. */ struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); static void __init zone_sizes_init(void) { diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index a945f00011b4..ec7fd45704d2 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -247,14 +247,3 @@ const struct dma_map_ops or1k_dma_map_ops = { .sync_single_for_device = or1k_sync_single_for_device, }; EXPORT_SYMBOL(or1k_dma_map_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fc5a574c3482..4d8f64d48597 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -51,6 +51,8 @@ config PARISC select GENERIC_CLOCKEVENTS select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH help The PA-RISC microprocessor is designed by Hewlett-Packard and used @@ -111,12 +113,6 @@ config PM config STACKTRACE_SUPPORT def_bool y -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config ISA_DMA_API bool diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 96b7deec512d..3328fd17c19d 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -88,29 +88,6 @@ struct pci_hba_data { #endif /* !CONFIG_64BIT */ /* - * If the PCI device's view of memory is the same as the CPU's view of memory, - * PCI_DMA_BUS_IS_PHYS is true. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#ifdef CONFIG_PA20 -/* All PA-2.0 machines have an IOMMU. */ -#define PCI_DMA_BUS_IS_PHYS 0 -#define parisc_has_iommu() do { } while (0) -#else - -#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA) -extern int parisc_bus_is_phys; /* in arch/parisc/kernel/setup.c */ -#define PCI_DMA_BUS_IS_PHYS parisc_bus_is_phys -#define parisc_has_iommu() do { parisc_bus_is_phys = 0; } while (0) -#else -#define PCI_DMA_BUS_IS_PHYS 1 -#define parisc_has_iommu() do { } while (0) -#endif - -#endif /* !CONFIG_PA20 */ - - -/* ** Most PCI devices (eg Tulip, NCR720) also export the same registers ** to both MMIO and I/O port space. Due to poor performance of I/O Port ** access under HP PCI bus adapters, strongly recommend the use of MMIO diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 91bc0cac03a1..6df07ce4f3c2 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -367,19 +367,6 @@ static int proc_pcxl_dma_show(struct seq_file *m, void *v) return 0; } -static int proc_pcxl_dma_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_pcxl_dma_show, NULL); -} - -static const struct file_operations proc_pcxl_dma_ops = { - .owner = THIS_MODULE, - .open = proc_pcxl_dma_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init pcxl_dma_init(void) { @@ -397,8 +384,8 @@ pcxl_dma_init(void) "pcxl_dma_init: Unable to create gsc /proc dir entry\n"); else { struct proc_dir_entry* ent; - ent = proc_create("pcxl_dma", 0, proc_gsc_root, - &proc_pcxl_dma_ops); + ent = proc_create_single("pcxl_dma", 0, proc_gsc_root, + proc_pcxl_dma_show); if (!ent) printk(KERN_WARNING "pci-dma.c: Unable to create pcxl_dma /proc entry.\n"); diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 3e04242de5a7..28e07482b0f1 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -266,18 +266,6 @@ static int pdc_chassis_warn_show(struct seq_file *m, void *v) return 0; } -static int pdc_chassis_warn_open(struct inode *inode, struct file *file) -{ - return single_open(file, pdc_chassis_warn_show, NULL); -} - -static const struct file_operations pdc_chassis_warn_fops = { - .open = pdc_chassis_warn_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init pdc_chassis_create_procfs(void) { unsigned long test; @@ -292,7 +280,7 @@ static int __init pdc_chassis_create_procfs(void) printk(KERN_INFO "Enabling PDC chassis warnings support v%s\n", PDC_CHASSIS_VER); - proc_create("chassis", 0400, NULL, &pdc_chassis_warn_fops); + proc_create_single("chassis", 0400, NULL, pdc_chassis_warn_show); return 0; } diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 0e9675f857a5..8d3a7b80ac42 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -58,11 +58,6 @@ struct proc_dir_entry * proc_runway_root __read_mostly = NULL; struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; -#if !defined(CONFIG_PA20) && (defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)) -int parisc_bus_is_phys __read_mostly = 1; /* Assume no IOMMU is present */ -EXPORT_SYMBOL(parisc_bus_is_phys); -#endif - void __init setup_cmdline(char **cmdline_p) { extern unsigned int boot_args[]; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c32a181a7cbb..268fd46fc3c7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -13,12 +13,6 @@ config 64BIT bool default y if PPC64 -config ARCH_PHYS_ADDR_T_64BIT - def_bool PPC64 || PHYS_64BIT - -config ARCH_DMA_ADDR_T_64BIT - def_bool ARCH_PHYS_ADDR_T_64BIT - config MMU bool default y @@ -187,7 +181,6 @@ config PPC select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_EBPF_JIT if PPC64 @@ -223,9 +216,11 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING + select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -478,19 +473,6 @@ config MPROFILE_KERNEL depends on PPC64 && CPU_LITTLE_ENDIAN def_bool !DISABLE_MPROFILE_KERNEL -config IOMMU_HELPER - def_bool PPC64 - -config SWIOTLB - bool "SWIOTLB support" - default n - select IOMMU_HELPER - ---help--- - Support for IO bounce buffering for systems without an IOMMU. - This allows us to DMA to the full physical address space on - platforms where the size of a physical address is larger - than the bus address. Not all platforms support this. - config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ @@ -913,9 +895,6 @@ config ZONE_DMA config NEED_DMA_MAP_STATE def_bool (PPC64 || NOT_COHERENT_CACHE) -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool depends on ISA_DMA_API diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 471b2274fbeb..c40b4380951c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -90,16 +111,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -108,21 +132,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..a9b64df34e2a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -199,6 +215,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include <linux/types.h> +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; void apply_feature_fixups(void); diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4c02a7378d06..e7377b73cfec 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -96,6 +96,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runner; struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ + u64 tb_offset_applied; /* timebase offset currently in force */ ulong lpcr; u32 arch_compat; ulong pcr; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 401c62aad5e4..2af9ded80540 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -92,24 +92,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -#ifdef CONFIG_PPC64 - -/* The PCI address space does not equal the physical memory address - * space (we have an IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - -#else /* 32-bit */ - -/* The PCI address space does equal the physical memory - * address space (no IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - -#endif /* CONFIG_PPC64 */ - extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fa4d2e1cf772..44989b22383c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -12,6 +12,17 @@ extern unsigned long powerpc_security_features; extern bool rfi_flush; +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + static inline void security_ftr_set(unsigned long feature) { powerpc_security_features |= feature; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6bee65f3cfd3..373dc1d6ef44 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -562,6 +562,7 @@ int main(void) OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); OFFSET(VCORE_KVM, kvmppc_vcore, kvm); OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); + OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied); OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); OFFSET(VCORE_PCR, kvmppc_vcore, pcr); OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 3f30c994e931..458b928dbd84 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index da20569de9d4..138157deeadf 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -309,8 +309,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -361,7 +359,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ab51f6ca03a..c904477abaf3 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); + mtspr(SPRN_PCR, 0); } mtspr(SPRN_FSCR, system_registers.fscr); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index bc640e4c5ca5..90bb39b1a23c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1775,18 +1775,6 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } -static int proc_eeh_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_eeh_show, NULL); -} - -static const struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_DEBUG_FS static int eeh_enable_dbgfs_set(void *data, u64 val) { @@ -1828,7 +1816,7 @@ DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, static int __init eeh_init_proc(void) { if (machine_is(pseries) || machine_is(powernv)) { - proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); + proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); #ifdef CONFIG_DEBUG_FS debugfs_create_file("eeh_enable", 0600, powerpc_debugfs_root, NULL, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae6a849db60b..f283958129f2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1507,6 +1509,19 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb070d8cad07..d49063d0baa4 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -154,18 +154,6 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); -static int sensors_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_sensors_show, NULL); -} - -static const struct file_operations ppc_rtas_sensors_operations = { - .open = sensors_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int poweron_open(struct inode *inode, struct file *file) { return single_open(file, ppc_rtas_poweron_show, NULL); @@ -231,18 +219,6 @@ static const struct file_operations ppc_rtas_tone_volume_operations = { .release = single_release, }; -static int rmo_buf_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_rmo_buf_show, NULL); -} - -static const struct file_operations ppc_rtas_rmo_buf_ops = { - .open = rmo_buf_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ppc_rtas_find_all_sensors(void); static void ppc_rtas_process_sensor(struct seq_file *m, struct individual_sensor *s, int state, int error, const char *loc); @@ -267,14 +243,14 @@ static int __init proc_rtas_init(void) &ppc_rtas_clock_operations); proc_create("powerpc/rtas/poweron", 0644, NULL, &ppc_rtas_poweron_operations); - proc_create("powerpc/rtas/sensors", 0444, NULL, - &ppc_rtas_sensors_operations); + proc_create_single("powerpc/rtas/sensors", 0444, NULL, + ppc_rtas_sensors_show); proc_create("powerpc/rtas/frequency", 0644, NULL, &ppc_rtas_tone_freq_operations); proc_create("powerpc/rtas/volume", 0644, NULL, &ppc_rtas_tone_volume_operations); - proc_create("powerpc/rtas/rmo_buffer", 0400, NULL, - &ppc_rtas_rmo_buf_ops); + proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL, + ppc_rtas_rmo_buf_show); return 0; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bab5a27ea805..b98a722da915 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/seq_buf.h> +#include <asm/debugfs.h> #include <asm/security_features.h> @@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c8af90ff49f0..b8d82678f8b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -134,6 +134,20 @@ SECTIONS #ifdef CONFIG_PPC64 . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; *(__rfi_flush_fixup) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index a57eafec4dc2..361f42c8c73e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) @@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) /* RIC=1 PRS=0 R=1 IS=2 */ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, @@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, + old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4d07fca5121c..9963f65c212b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->napping_threads = 0; vc->conferring_threads = 0; + vc->tb_offset_applied = 0; } static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bd63fa8a08b5..07ca1b2a7966 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 22: ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 beq 37f + std r8, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current host timebase */ add r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -940,18 +941,6 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET(r5) - add r8,r8,r6 - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) ld r7, VCPU_SPRG2(r4) @@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r8 isync + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + /* r8 is a host timebase value here, convert to guest TB */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r6,VCORE_TB_OFFSET_APPL(r5) + add r8,r8,r6 + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC EXTEND_HDEC(r3) @@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) guest_bypass: stw r12, STACK_SLOT_TRAP(r1) - mr r3, r12 + + /* Save DEC */ + /* Do this before kvmhv_commence_exit so we know TB is guest TB */ + ld r3, HSTATE_KVM_VCORE(r13) + mfspr r5,SPRN_DEC + mftb r6 + /* On P9, if the guest has large decr enabled, don't sign extend */ +BEGIN_FTR_SECTION + ld r4, VCORE_LPCR(r3) + andis. r4, r4, LPCR_LD@h + bne 16f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r5,r5 +16: add r5,r5,r6 + /* r5 is a guest timebase value here, convert to host TB */ + ld r4,VCORE_TB_OFFSET_APPL(r3) + subf r5,r4,r5 + std r5,VCPU_DEC_EXPIRES(r9) + /* Increment exit count, poke other threads to exit */ + mr r3, r12 bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) @@ -1639,23 +1659,6 @@ guest_bypass: mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 - /* Save DEC */ - ld r3, HSTATE_KVM_VCORE(r13) - mfspr r5,SPRN_DEC - mftb r6 - /* On P9, if the guest has large decr enabled, don't sign extend */ -BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) - andis. r4, r4, LPCR_LD@h - bne 16f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - extsw r5,r5 -16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET(r3) - subf r5,r4,r5 - std r5,VCPU_DEC_EXPIRES(r9) - BEGIN_FTR_SECTION b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) @@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) cmpwi cr2, r0, 0 beq cr2, 4f + /* + * Radix: do eieio; tlbsync; ptesync sequence in case we + * interrupted the guest between a tlbie and a ptesync. + */ + eieio + tlbsync + ptesync + /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) lwz r3, VCPU_GUEST_PID(r9) @@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 27: /* Subtract timebase offset from timebase */ - ld r8,VCORE_TB_OFFSET(r5) + ld r8, VCORE_TB_OFFSET_APPL(r5) cmpdi r8,0 beq 17f + li r0, 0 + std r0, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current guest timebase */ subf r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) @@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 @@ -3606,12 +3619,9 @@ kvmppc_fix_pmao: */ kvmhv_start_timing: ld r5, HSTATE_KVM_VCORE(r13) - lbz r6, VCORE_IN_GUEST(r5) - cmpwi r6, 0 - beq 5f /* if in guest, need to */ - ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -5: mftb r5 - subf r5, r6, r5 + ld r6, VCORE_TB_OFFSET_APPL(r5) + mftb r5 + subf r5, r6, r5 /* subtract current timebase offset */ std r3, VCPU_CUR_ACTIVITY(r4) std r5, VCPU_ACTIVITY_START(r4) blr @@ -3622,15 +3632,12 @@ kvmhv_start_timing: */ kvmhv_accumulate_time: ld r5, HSTATE_KVM_VCORE(r13) - lbz r8, VCORE_IN_GUEST(r5) - cmpwi r8, 0 - beq 4f /* if in guest, need to */ - ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -4: ld r5, VCPU_CUR_ACTIVITY(r4) + ld r8, VCORE_TB_OFFSET_APPL(r5) + ld r5, VCPU_CUR_ACTIVITY(r4) ld r6, VCPU_ACTIVITY_START(r4) std r3, VCPU_CUR_ACTIVITY(r4) mftb r7 - subf r7, r8, r7 + subf r7, r8, r7 /* subtract current timebase offset */ std r7, VCPU_ACTIVITY_START(r4) cmpdi r5, 0 beqlr diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index c7a5deadd1cc..99c3620b40d9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -11,6 +11,9 @@ #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ +#define XICS_DUMMY 1 + static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) { u8 cppr; @@ -205,6 +208,10 @@ skip_ipi: goto skip_ipi; } + /* If it's the dummy interrupt, continue searching */ + if (hirq == XICS_DUMMY) + goto skip_ipi; + /* If fetching, update queue pointers */ if (scan_type == scan_fetch) { q->idx = idx; @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc) __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); } +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive, + struct kvmppc_xive_vcpu *xc) +{ + unsigned int prio; + + /* For each priority that is now masked */ + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + struct kvmppc_xive_irq_state *state; + struct kvmppc_xive_src_block *sb; + u32 idx, toggle, entry, irq, hw_num; + struct xive_irq_data *xd; + __be32 *qpage; + u16 src; + + idx = q->idx; + toggle = q->toggle; + qpage = READ_ONCE(q->qpage); + if (!qpage) + continue; + + /* For each interrupt in the queue */ + for (;;) { + entry = be32_to_cpup(qpage + idx); + + /* No more ? */ + if ((entry >> 31) == toggle) + break; + irq = entry & 0x7fffffff; + + /* Skip dummies and IPIs */ + if (irq == XICS_DUMMY || irq == XICS_IPI) + goto next; + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + goto next; + state = &sb->irq_state[src]; + + /* Has it been rerouted ? */ + if (xc->server_num == state->act_server) + goto next; + + /* + * Allright, it *has* been re-routed, kill it from + * the queue. + */ + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); + + /* Find the HW interrupt */ + kvmppc_xive_select_irq(state, &hw_num, &xd); + + /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ + if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) + GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11); + + /* EOI the source */ + GLUE(X_PFX,source_eoi)(hw_num, xd); + + next: + idx = (idx + 1) & q->msk; + if (idx == 0) + toggle ^= 1; + } + } +} + X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; u8 old_cppr; pr_devel("H_CPPR(cppr=%ld)\n", cppr); @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) */ smp_mb(); - /* - * We are masking less, we need to look for pending things - * to deliver and set VP pending bits accordingly to trigger - * a new interrupt otherwise we might miss MFRR changes for - * which we have optimized out sending an IPI signal. - */ - if (cppr > old_cppr) + if (cppr > old_cppr) { + /* + * We are masking less, we need to look for pending things + * to deliver and set VP pending bits accordingly to trigger + * a new interrupt otherwise we might miss MFRR changes for + * which we have optimized out sending an IPI signal. + */ GLUE(X_PFX,push_pending_to_hw)(xc); + } else { + /* + * We are masking more, we need to check the queue for any + * interrupt that has been routed to another CPU, take + * it out (replace it with the dummy) and retrigger it. + * + * This is necessary since those interrupts may otherwise + * never be processed, at least not until this CPU restores + * its CPPR. + * + * This is in theory racy vs. HW adding new interrupts to + * the queue. In practice this works because the interesting + * cases are when the guest has done a set_xive() to move the + * interrupt away, which flushes the xive, followed by the + * target CPU doing a H_CPPR. So any new interrupt coming into + * the queue must still be routed to us and isn't a source + * of concern. + */ + GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc); + } /* Apply new CPPR */ xc->hw_cppr = cppr; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 288fe4f0db4e..e1bcdc32a851 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/security_features.h> #include <asm/firmware.h> struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 67d3125d0610..84b58abc08ee 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -222,6 +222,7 @@ config PTE_64BIT config PHYS_64BIT bool 'Large physical address support' if E500 || PPC_86xx depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx + select PHYS_ADDR_T_64BIT ---help--- This option enables kernel support for larger than 32-bit physical addresses. This feature may not be available on all cores. diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index ccc421503363..c9ef3c532169 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1095,18 +1095,6 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -} - -static int spu_loadavg_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_spu_loadavg, NULL); -} - -static const struct file_operations spu_loadavg_fops = { - .open = spu_loadavg_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, }; int __init spu_sched_init(void) @@ -1135,7 +1123,7 @@ int __init spu_sched_init(void) mod_timer(&spuloadavg_timer, 0); - entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); + entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg); if (!entry) goto out_stop_kthread; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ef8c9ce53a61..a6648ec99ca7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b55ad4286dc7..fdb32e056ef4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cd4fd85fde84..274bc064c41f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -3,8 +3,16 @@ # see Documentation/kbuild/kconfig-language.txt. # +config 64BIT + bool + +config 32BIT + bool + config RISCV def_bool y + # even on 32-bit, physical (and DMA) addresses are > 32-bits + select PHYS_ADDR_T_64BIT select OF select OF_EARLY_FLATTREE select OF_IRQ @@ -22,7 +30,6 @@ config RISCV select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT select IRQ_DOMAIN @@ -39,16 +46,9 @@ config RISCV config MMU def_bool y -# even on 32-bit, physical (and DMA) addresses are > 32-bits -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - config ZONE_DMA32 bool - default y - -config ARCH_DMA_ADDR_T_64BIT - def_bool y + default y if 64BIT config PAGE_OFFSET hex @@ -101,7 +101,6 @@ choice config ARCH_RV32I bool "RV32I" - select CPU_SUPPORTS_32BIT_KERNEL select 32BIT select GENERIC_ASHLDI3 select GENERIC_ASHRDI3 @@ -109,13 +108,13 @@ config ARCH_RV32I config ARCH_RV64I bool "RV64I" - select CPU_SUPPORTS_64BIT_KERNEL select 64BIT select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select SWIOTLB endchoice @@ -171,11 +170,6 @@ config NR_CPUS depends on SMP default "8" -config CPU_SUPPORTS_32BIT_KERNEL - bool -config CPU_SUPPORTS_64BIT_KERNEL - bool - choice prompt "CPU Tuning" default TUNE_GENERIC @@ -202,24 +196,6 @@ endmenu menu "Kernel type" -choice - prompt "Kernel code model" - default 64BIT - -config 32BIT - bool "32-bit kernel" - depends on CPU_SUPPORTS_32BIT_KERNEL - help - Select this option to build a 32-bit kernel. - -config 64BIT - bool "64-bit kernel" - depends on CPU_SUPPORTS_64BIT_KERNEL - help - Select this option to build a 64-bit kernel. - -endchoice - source "mm/Kconfig" source "kernel/Kconfig.preempt" diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h new file mode 100644 index 000000000000..8facc1c8fa05 --- /dev/null +++ b/arch/riscv/include/asm/dma-mapping.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _RISCV_ASM_DMA_MAPPING_H +#define _RISCV_ASM_DMA_MAPPING_H 1 + +#ifdef CONFIG_SWIOTLB +#include <linux/swiotlb.h> +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +{ + return &swiotlb_dma_ops; +} +#else +#include <asm-generic/dma-mapping.h> +#endif /* CONFIG_SWIOTLB */ + +#endif /* _RISCV_ASM_DMA_MAPPING_H */ diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h index 0f2fc9ef20fc..b3638c505728 100644 --- a/arch/riscv/include/asm/pci.h +++ b/arch/riscv/include/asm/pci.h @@ -26,9 +26,6 @@ /* RISC-V shim does not initialize PCI bus */ #define pcibios_assign_all_busses() 1 -/* We do not have an IOMMU */ -#define PCI_DMA_BUS_IS_PHYS 1 - extern int isa_dma_bridge_buggy; #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c11f40c1b2a8..ee44a48faf79 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/sched/task.h> +#include <linux/swiotlb.h> #include <asm/setup.h> #include <asm/sections.h> @@ -206,6 +207,7 @@ void __init setup_arch(char **cmdline_p) setup_bootmem(); paging_init(); unflatten_device_tree(); + swiotlb_init(1); #ifdef CONFIG_SMP setup_smp(); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 199ac3e4da1d..6a64287ec1da 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -35,9 +35,6 @@ config GENERIC_BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - config GENERIC_LOCKBREAK def_bool y if SMP && PREEMPT @@ -133,7 +130,6 @@ config S390 select HAVE_CMPXCHG_LOCAL select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select DMA_DIRECT_OPS select HAVE_DYNAMIC_FTRACE @@ -709,7 +705,11 @@ config QDIO menuconfig PCI bool "PCI support" select PCI_MSI + select IOMMU_HELPER select IOMMU_SUPPORT + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH + help Enable PCI support. @@ -733,15 +733,6 @@ config PCI_DOMAINS config HAS_IOMEM def_bool PCI -config IOMMU_HELPER - def_bool PCI - -config NEED_SG_DMA_LENGTH - def_bool PCI - -config NEED_DMA_MAP_STATE - def_bool PCI - config CHSC_SCH def_tristate m prompt "Support for CHSC subchannels" diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 12fe3591034f..94f8db468c9b 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -2,8 +2,6 @@ #ifndef __ASM_S390_PCI_H #define __ASM_S390_PCI_H -/* must be set before including asm-generic/pci.h */ -#define PCI_DMA_BUS_IS_PHYS (0) /* must be set before including pci_clp.h */ #define PCI_BAR_COUNT 6 diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index fc7e04c2195b..54f5496913fa 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -294,21 +294,9 @@ static int sysinfo_show(struct seq_file *m, void *v) return 0; } -static int sysinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, sysinfo_show, NULL); -} - -static const struct file_operations sysinfo_fops = { - .open = sysinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init sysinfo_create_proc(void) { - proc_create("sysinfo", 0444, NULL, &sysinfo_fops); + proc_create_single("sysinfo", 0444, NULL, sysinfo_show); return 0; } device_initcall(sysinfo_create_proc); @@ -386,18 +374,6 @@ static const struct seq_operations service_level_seq_ops = { .show = service_level_show }; -static int service_level_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &service_level_seq_ops); -} - -static const struct file_operations service_level_ops = { - .open = service_level_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void service_level_vm_print(struct seq_file *m, struct service_level *slr) { @@ -420,7 +396,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { - proc_create("service_levels", 0, NULL, &service_level_ops); + proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); if (MACHINE_IS_VM) register_service_level(&service_level_vm); return 0; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8961e3970901..969882b54266 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; if (gpa && (scb_s->ecb & ECB_TE)) { - if (!(gpa & ~0x1fffU)) { + if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2d15d84c20ed..d387a0fbdd7e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -668,15 +668,6 @@ void zpci_dma_exit(void) kmem_cache_destroy(dma_region_table_cache); } -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_debug_do_init); - const struct dma_map_ops s390_pci_dma_ops = { .alloc = s390_dma_alloc, .free = s390_dma_free, @@ -685,8 +676,6 @@ const struct dma_map_ops s390_pci_dma_ops = { .map_page = s390_dma_map_pages, .unmap_page = s390_dma_unmap_pages, .mapping_error = s390_mapping_error, - /* if we support direct DMA this must be conditional */ - .is_phys = 0, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index e9525bc1b4a6..1ace023cbdce 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -21,7 +21,7 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1851eaeee131..a97538b607a4 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -14,7 +14,6 @@ config SUPERH select HAVE_OPROFILE select HAVE_GENERIC_DMA_COHERENT select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H @@ -51,6 +50,9 @@ config SUPERH select HAVE_ARCH_AUDITSYSCALL select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_NMI + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH + help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast @@ -161,12 +163,6 @@ config DMA_COHERENT config DMA_NONCOHERENT def_bool !DMA_COHERENT -config NEED_DMA_MAP_STATE - def_bool DMA_NONCOHERENT - -config NEED_SG_DMA_LENGTH - def_bool y - config PGTABLE_LEVELS default 3 if X2TLB default 2 diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c index c0eec08d8f95..b05be597b19f 100644 --- a/arch/sh/drivers/dma/dma-api.c +++ b/arch/sh/drivers/dma/dma-api.c @@ -339,18 +339,6 @@ static int dma_proc_show(struct seq_file *m, void *v) return 0; } -static int dma_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dma_proc_show, NULL); -} - -static const struct file_operations dma_proc_fops = { - .open = dma_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - int register_dmac(struct dma_info *info) { unsigned int total_channels, i; @@ -423,7 +411,7 @@ EXPORT_SYMBOL(unregister_dmac); static int __init dma_api_init(void) { printk(KERN_NOTICE "DMA: Registering DMA API.\n"); - return proc_create("dma", 0, NULL, &dma_proc_fops) ? 0 : -ENOMEM; + return proc_create_single("dma", 0, NULL, dma_proc_show) ? 0 : -ENOMEM; } subsys_initcall(dma_api_init); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 0033f0df2b3b..10a36b1cf2ea 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -71,12 +71,6 @@ extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM; * SuperH has everything mapped statically like x86. */ -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - #ifdef CONFIG_PCI /* * None of the SH PCI controllers support MWI, it is always treated as a diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index 178457d7620c..3e3a32fc676e 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -78,7 +78,6 @@ const struct dma_map_ops nommu_dma_ops = { .sync_single_for_device = nommu_sync_single_for_device, .sync_sg_for_device = nommu_sync_sg_for_device, #endif - .is_phys = 1, }; void __init no_iommu_init(void) diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index f1b44697ad68..fceb2adfcac7 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -20,18 +20,9 @@ #include <asm/cacheflush.h> #include <asm/addrspace.h> -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_init); - void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8767e45f1b2b..435dbc033afe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,6 @@ config SPARC select RTC_CLASS select RTC_DRV_M48T59 select RTC_SYSTOHC - select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION @@ -44,6 +43,8 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH config SPARC32 def_bool !64BIT @@ -67,6 +68,7 @@ config SPARC64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_KMEMLEAK + select IOMMU_HELPER select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 @@ -102,14 +104,6 @@ config ARCH_ATU bool default y if SPARC64 -config ARCH_DMA_ADDR_T_64BIT - bool - default y if ARCH_ATU - -config IOMMU_HELPER - bool - default y if SPARC64 - config STACKTRACE_SUPPORT bool default y if SPARC64 @@ -146,12 +140,6 @@ config ZONE_DMA bool default y if SPARC32 -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool default y if SPARC32 diff --git a/include/linux/iommu-common.h b/arch/sparc/include/asm/iommu-common.h index 802c90c79d1f..802c90c79d1f 100644 --- a/include/linux/iommu-common.h +++ b/arch/sparc/include/asm/iommu-common.h diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 9ed6b54caa4b..0ef6dedf747e 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -17,7 +17,7 @@ #define IOPTE_WRITE 0x0000000000000002UL #define IOMMU_NUM_CTXS 4096 -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> struct iommu_arena { unsigned long *map; diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h index 98917e48727d..cfc0ee9476c6 100644 --- a/arch/sparc/include/asm/pci_32.h +++ b/arch/sparc/include/asm/pci_32.h @@ -17,10 +17,6 @@ #define PCI_IRQ_NONE 0xffffffff -/* Dynamic DMA mapping stuff. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - #endif /* __KERNEL__ */ #ifndef CONFIG_LEON_PCI diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index 671274e36cfa..fac77813402c 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -17,12 +17,6 @@ #define PCI_IRQ_NONE 0xffffffff -/* The PCI address space does not equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - /* PCI IOMMU mapping bypass support. */ /* PCI 64-bit addressing works for all slots on all controller diff --git a/arch/sparc/include/uapi/asm/jsflash.h b/arch/sparc/include/uapi/asm/jsflash.h deleted file mode 100644 index 68c98a54281a..000000000000 --- a/arch/sparc/include/uapi/asm/jsflash.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * jsflash.h: OS Flash SIMM support for JavaStations. - * - * Copyright (C) 1999 Pete Zaitcev - */ - -#ifndef _SPARC_JSFLASH_H -#define _SPARC_JSFLASH_H - -#ifndef _SPARC_TYPES_H -#include <linux/types.h> -#endif - -/* - * Semantics of the offset is a full address. - * Hardcode it or get it from probe ioctl. - * - * We use full bus address, so that we would be - * automatically compatible with possible future systems. - */ - -#define JSFLASH_IDENT (('F'<<8)|54) -struct jsflash_ident_arg { - __u64 off; /* 0x20000000 is included */ - __u32 size; - char name[32]; /* With trailing zero */ -}; - -#define JSFLASH_ERASE (('F'<<8)|55) -/* Put 0 as argument, may be flags or sector number... */ - -#define JSFLASH_PROGRAM (('F'<<8)|56) -struct jsflash_program_arg { - __u64 data; /* char* for sparc and sparc64 */ - __u64 off; - __u32 size; -}; - -#endif /* _SPARC_JSFLASH_H */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 76cb57750dda..cf8640841b7a 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_SPARC32) += leon_pmc.o obj-$(CONFIG_SPARC64) += reboot.o obj-$(CONFIG_SPARC64) += sysfs.o -obj-$(CONFIG_SPARC64) += iommu.o +obj-$(CONFIG_SPARC64) += iommu.o iommu-common.o obj-$(CONFIG_SPARC64) += central.o obj-$(CONFIG_SPARC64) += starfire.o obj-$(CONFIG_SPARC64) += power.o @@ -74,8 +74,6 @@ obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o -obj-y += dma.o - obj-$(CONFIG_PCIC_PCI) += pcic.o obj-$(CONFIG_LEON_PCI) += leon_pci.o obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c deleted file mode 100644 index f73e7597c971..000000000000 --- a/arch/sparc/kernel/dma.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/dma-mapping.h> -#include <linux/dma-debug.h> - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 15) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_init); diff --git a/lib/iommu-common.c b/arch/sparc/kernel/iommu-common.c index 55b00de106b5..59cb16691322 100644 --- a/lib/iommu-common.c +++ b/arch/sparc/kernel/iommu-common.c @@ -8,9 +8,9 @@ #include <linux/bitmap.h> #include <linux/bug.h> #include <linux/iommu-helper.h> -#include <linux/iommu-common.h> #include <linux/dma-mapping.h> #include <linux/hash.h> +#include <asm/iommu-common.h> static unsigned long iommu_large_alloc = 15; @@ -93,7 +93,6 @@ void iommu_tbl_pool_init(struct iommu_map_table *iommu, p->hint = p->start; p->end = num_entries; } -EXPORT_SYMBOL(iommu_tbl_pool_init); unsigned long iommu_tbl_range_alloc(struct device *dev, struct iommu_map_table *iommu, @@ -224,7 +223,6 @@ bail: return n; } -EXPORT_SYMBOL(iommu_tbl_range_alloc); static struct iommu_pool *get_pool(struct iommu_map_table *tbl, unsigned long entry) @@ -264,4 +262,3 @@ void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, bitmap_clear(iommu->map, entry, npages); spin_unlock_irqrestore(&(pool->lock), flags); } -EXPORT_SYMBOL(iommu_tbl_range_free); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index b08dc3416f06..40d008b0bd3e 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/iommu-helper.h> #include <linux/bitmap.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #ifdef CONFIG_PCI #include <linux/pci.h> diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 3bcef9ce74df..cca9134cfa7d 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -678,25 +678,14 @@ static int sparc_io_proc_show(struct seq_file *m, void *v) return 0; } - -static int sparc_io_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, sparc_io_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations sparc_io_proc_fops = { - .owner = THIS_MODULE, - .open = sparc_io_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ static void register_proc_sparc_ioport(void) { #ifdef CONFIG_PROC_FS - proc_create_data("io_map", 0, NULL, &sparc_io_proc_fops, &sparc_iomap); - proc_create_data("dvma_map", 0, NULL, &sparc_io_proc_fops, &_sparc_dvma); + proc_create_single_data("io_map", 0, NULL, sparc_io_proc_show, + &sparc_iomap); + proc_create_single_data("dvma_map", 0, NULL, sparc_io_proc_show, + &_sparc_dvma); #endif } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 86b625f9d8dc..c0fa3ef6cf01 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -16,7 +16,7 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/bitmap.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #include <asm/hypervisor.h> #include <asm/iommu.h> diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 249367228c33..565d9ac883d0 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -16,7 +16,7 @@ #include <linux/export.h> #include <linux/log2.h> #include <linux/of_device.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #include <asm/iommu.h> #include <asm/irq.h> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index d4e8c497ae86..dcf5ea28a281 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -208,19 +208,6 @@ static int fake_ide_media_proc_show(struct seq_file *m, void *v) return 0; } -static int fake_ide_media_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, fake_ide_media_proc_show, NULL); -} - -static const struct file_operations fake_ide_media_proc_fops = { - .owner = THIS_MODULE, - .open = fake_ide_media_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void make_ide_entries(const char *dev_name) { struct proc_dir_entry *dir, *ent; @@ -231,7 +218,8 @@ static void make_ide_entries(const char *dev_name) dir = proc_mkdir(dev_name, proc_ide); if(!dir) return; - ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops); + ent = proc_create_single("media", S_IRUGO, dir, + fake_ide_media_proc_show); if(!ent) return; snprintf(name, sizeof(name), "ide0/%s", dev_name); proc_symlink(dev_name, proc_ide_root, name); diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 462e59a7ae78..03f991e44288 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -19,6 +19,8 @@ config UNICORE32 select ARCH_WANT_FRAME_POINTERS select GENERIC_IOMAP select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE + select SWIOTLB help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip @@ -61,9 +63,6 @@ config ARCH_MAY_HAVE_PC_FDC config ZONE_DMA def_bool y -config NEED_DMA_MAP_STATE - def_bool y - source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/unicore32/mm/Kconfig b/arch/unicore32/mm/Kconfig index e9154a59d561..82759b6aba67 100644 --- a/arch/unicore32/mm/Kconfig +++ b/arch/unicore32/mm/Kconfig @@ -39,14 +39,3 @@ config CPU_TLB_SINGLE_ENTRY_DISABLE default y help Say Y here to disable the TLB single entry operations. - -config SWIOTLB - def_bool y - select DMA_DIRECT_OPS - -config IOMMU_HELPER - def_bool SWIOTLB - -config NEED_SG_DMA_LENGTH - def_bool SWIOTLB - diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..f2ee6a8ffe65 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,6 +28,8 @@ config X86_64 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA + select NEED_DMA_MAP_STATE + select SWIOTLB select X86_DEV_DMA_OPS select ARCH_HAS_SYSCALL_WRAPPER @@ -134,7 +136,6 @@ config X86 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS @@ -184,6 +185,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select IRQ_FORCED_THREADING + select NEED_SG_DMA_LENGTH select PCI_LOCKLESS_CONFIG select PERF_EVENTS select RTC_LIB @@ -236,13 +238,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX config SBUS bool -config NEED_DMA_MAP_STATE - def_bool y - depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA def_bool y depends on ISA_DMA_API @@ -875,6 +870,7 @@ config DMI config GART_IOMMU bool "Old AMD GART IOMMU support" + select IOMMU_HELPER select SWIOTLB depends on X86_64 && PCI && AMD_NB ---help--- @@ -896,6 +892,7 @@ config GART_IOMMU config CALGARY_IOMMU bool "IBM Calgary IOMMU support" + select IOMMU_HELPER select SWIOTLB depends on X86_64 && PCI ---help--- @@ -923,20 +920,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. -# need this always selected by IOMMU for the VIA workaround -config SWIOTLB - def_bool y if X86_64 - ---help--- - Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU. Using this PCI devices - which can only access 32-bits of memory can be used on systems - with more than 3 GB of memory. - If unsure, say Y. - -config IOMMU_HELPER - def_bool y - depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU - config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL @@ -1458,6 +1441,7 @@ config HIGHMEM config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G + select PHYS_ADDR_T_64BIT select SWIOTLB ---help--- PAE is required for NX support, and furthermore enables @@ -1485,14 +1469,6 @@ config X86_5LEVEL Say N if unsure. -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - depends on X86_64 || X86_PAE - -config ARCH_DMA_ADDR_T_64BIT - def_bool y - depends on X86_64 || HIGHMEM64G - config X86_DIRECT_GBPAGES def_bool y depends on X86_64 && !DEBUG_PAGEALLOC diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 578793e97431..fb00a2fca990 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -207,13 +206,19 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ - +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -274,9 +279,10 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -334,6 +340,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) @@ -363,5 +370,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 89ce4bfd241f..ce4d176b3d13 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -30,10 +30,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -int arch_dma_supported(struct device *dev, u64 mask); -#define arch_dma_supported arch_dma_supported - -bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); +bool arch_dma_alloc_attrs(struct device **dev); #define arch_dma_alloc_attrs arch_dma_alloc_attrs #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c25775fad4ed..f4b2588865e9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -924,7 +924,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); struct kvm *(*vm_alloc)(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 53d5b1b9255e..fda2114197b3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,6 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -68,6 +70,11 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_SSB_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e @@ -340,6 +347,8 @@ #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..8b38df98548e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,6 +217,14 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" ((u32)val), + "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_base; \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d32175e30259..662963681ea6 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -117,9 +117,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif - -#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..ae7c2c5cd7f0 --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include <linux/thread_info.h> +#include <asm/nospec-branch.h> + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_ssbd_mask; + +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +} + +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} + +#endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a5d9521bb2cb..2ff2a30a264f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,6 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -105,6 +106,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -144,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index dfcbe6924eaf..cadeafabf167 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1715,19 +1715,6 @@ static int proc_apm_show(struct seq_file *m, void *v) return 0; } -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_file_ops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int apm(void *unused) { unsigned short bx; @@ -2360,7 +2347,7 @@ static int __init apm_init(void) set_desc_base(&gdt[APM_DS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); - proc_create("apm", 0, NULL, &apm_file_ops); + proc_create_single("apm", 0, NULL, proc_apm_show); kapmd_task = kthread_create(apm, NULL, "kapmd"); if (IS_ERR(kapmd_task)) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 12bc0a1139da..1b18be3f35a8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> +#include <asm/spec-ctrl.h> #include <asm/smp.h> #include <asm/pci-direct.h> #include <asm/delay.h> @@ -554,6 +555,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_FAM10H_NODE_ID, value); nodes_per_socket = ((value >> 3) & 7) + 1; } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); + setup_force_cpu_cap(X86_FEATURE_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -791,6 +812,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bfca937bdcc3..7416fc206b4a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,8 +12,10 @@ #include <linux/utsname.h> #include <linux/cpu.h> #include <linux/module.h> +#include <linux/nospec.h> +#include <linux/prctl.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/cmdline.h> #include <asm/bugs.h> #include <asm/processor.h> @@ -27,6 +29,27 @@ #include <asm/intel-family.h> static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); + +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __ro_after_init x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; + +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ +u64 __ro_after_init x86_amd_ls_cfg_base; +u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -37,9 +60,27 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = { #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +{ + u64 msrval, guestval, hostval = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); + } + } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } +} +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); + +static void x86_amd_ssb_disable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -312,32 +422,289 @@ retpoline_auto: } #undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_SSBD)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode == SPEC_STORE_BYPASS_DISABLE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + break; + case X86_VENDOR_AMD: + x86_amd_ssb_disable(); + break; + } + } + + return mode; +} + +static void ssb_select_mitigation(void) +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + bool update; + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) + return -ENXIO; + + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && update) + speculative_store_bypass_update_current(); + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + +static int ssb_prctl_get(struct task_struct *task) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: + case SPEC_STORE_BYPASS_PRCTL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +} #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) + +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + + default: + break; + } + return sprintf(buf, "Vulnerable\n"); } +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); +} + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); +} - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ce243f7d2d4e..38276f58d3bf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -757,17 +757,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { + set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -927,21 +942,47 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +/* Only list CPUs which speculate but are non susceptible to SSB */ +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, + {} +}; + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_meltdown)) - return false; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_meltdown)) + return; + /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -992,12 +1033,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); @@ -1359,6 +1395,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); validate_apic_and_package_id(c); + x86_spec_ctrl_setup_ap(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e806b11a99af..37672d299e35 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -50,4 +50,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 60d1897041da..577e7f7ae273 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 77625b60a510..ab5d9dd668d2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -15,13 +15,11 @@ #include <asm/x86_init.h> #include <asm/iommu_table.h> -static int forbid_dac __read_mostly; +static bool disable_dac_quirk __read_mostly; const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); -static int iommu_sac_force __read_mostly; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -55,9 +53,6 @@ struct device x86_dma_fallback_dev = { }; EXPORT_SYMBOL(x86_dma_fallback_dev); -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES 65536 - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -76,7 +71,7 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) +bool arch_dma_alloc_attrs(struct device **dev) { if (!*dev) *dev = &x86_dma_fallback_dev; @@ -125,13 +120,13 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; + pr_warn("forcesac option ignored.\n"); if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; + pr_warn("allowdac option ignored.\n"); if (!strncmp(p, "nodac", 5)) - forbid_dac = 1; + pr_warn("nodac option ignored.\n"); if (!strncmp(p, "usedac", 6)) { - forbid_dac = -1; + disable_dac_quirk = true; return 1; } #ifdef CONFIG_SWIOTLB @@ -156,40 +151,9 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -int arch_dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - dev_info(dev, "PCI: Disallowing DAC for device\n"); - return 0; - } -#endif - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { - dev_info(dev, "Force SAC with mask %Lx\n", mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(arch_dma_supported); - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); @@ -209,11 +173,17 @@ rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ +static int via_no_dac_cb(struct pci_dev *pdev, void *data) +{ + pdev->dev.dma_32bit_limit = true; + return 0; +} + static void via_no_dac(struct pci_dev *dev) { - if (forbid_dac == 0) { + if (!disable_dac_quirk) { dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); - forbid_dac = 1; + pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL); } } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 03408b942adb..30ca2d1a9231 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -38,6 +38,7 @@ #include <asm/switch_to.h> #include <asm/desc.h> #include <asm/prctl.h> +#include <asm/spec-ctrl.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); + + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); + } else { + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); + } +} +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); +} + +void speculative_store_bypass_update(unsigned long tif) +{ + preempt_disable(); + __speculative_store_bypass_update(tif); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); + + if ((tifp ^ tifn) & _TIF_SSBD) + __speculative_store_bypass_update(tifn); } /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0f1cbb042f49..9dd324ae4832 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -79,6 +79,7 @@ #include <asm/qspinlock.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> +#include <asm/spec-ctrl.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -244,6 +245,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock, set CPU online and bring the vector * allocator online. Online must be set with vector_lock held @@ -1292,6 +1295,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 82055b90a8b3..92bf2f2e7cdd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(ARCH_CAPABILITIES); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; @@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5708e951a5c6..46ff64da44ca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1260,14 +1260,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) } } -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { - struct kvm_run *run = vcpu->run; - - kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + kvm_hv_hypercall_set_result(vcpu, result); + ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); +} + static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) { struct eventfd_ctx *eventfd; @@ -1350,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { ret = HV_STATUS_INVALID_HYPERCALL_CODE; - goto set_result; + goto out; } switch (code) { @@ -1381,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } -set_result: - kvm_hv_hypercall_set_result(vcpu, ret); - return 1; +out: + return kvm_hv_hypercall_complete(vcpu, ret); } void kvm_hv_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b74c9c1405b9..3773c4625114 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); } static void start_sw_period(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fc05e428aba..26110c202b19 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/virtext.h> #include "trace.h" @@ -213,6 +213,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_F15H_IC_CFG: { int family, model; @@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) @@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); @@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3f1696570b41..40aa29204baf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/mshyperv.h> #include "trace.h" @@ -3529,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -3648,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; @@ -3679,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -9488,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -9722,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; @@ -9871,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -12632,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vm_init = vmx_vm_init, .vm_alloc = vmx_vm_alloc, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59371de5d722..71e7cda6d014 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1061,6 +1061,7 @@ static u32 emulated_msrs[] = { MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2906,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4606,14 +4607,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -6676,11 +6671,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit; - if (kvm_hv_hypercall_enabled(vcpu->kvm)) { - if (!kvm_hv_hypercall(vcpu)) - return 0; - goto out; - } + if (kvm_hv_hypercall_enabled(vcpu->kvm)) + return kvm_hv_hypercall(vcpu); nr = kvm_register_read(vcpu, VCPU_REGS_RAX); a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); @@ -6701,7 +6693,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_x86_ops->get_cpl(vcpu) != 0) { ret = -KVM_EPERM; - goto out_error; + goto out; } switch (nr) { @@ -6721,12 +6713,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } -out_error: +out: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); -out: ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } @@ -7985,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -8023,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index c921e8bccdc8..17df332269b2 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -19,7 +19,6 @@ config XTENSA select HAVE_ARCH_KASAN if MMU select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index d5a82153a7c5..6ddf0a30c60d 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -42,8 +42,6 @@ extern struct pci_controller* pcibios_alloc_controller(void); * decisions. */ -#define PCI_DMA_BUS_IS_PHYS (1) - /* Tell PCI code what kind of PCI resource mappings we support */ #define HAVE_PCI_MMAP 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 732631ce250f..392b4a80ebc2 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -261,12 +261,3 @@ const struct dma_map_ops xtensa_dma_map_ops = { .mapping_error = xtensa_dma_mapping_error, }; EXPORT_SYMBOL(xtensa_dma_map_ops); - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init xtensa_dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(xtensa_dma_init); diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 92f567f9a21e..af81a62faba6 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -153,19 +153,6 @@ static int rs_proc_show(struct seq_file *m, void *v) return 0; } -static int rs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rs_proc_show, NULL); -} - -static const struct file_operations rs_proc_fops = { - .owner = THIS_MODULE, - .open = rs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, @@ -176,7 +163,7 @@ static const struct tty_operations serial_ops = { .chars_in_buffer = rs_chars_in_buffer, .hangup = rs_hangup, .wait_until_sent = rs_wait_until_sent, - .proc_fops = &rs_proc_fops, + .proc_show = rs_proc_show, }; int __init rs_init(void) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index d819dc77fe65..a9e8633388f4 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty) /* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { - unsigned long long now; + u64 now; if (!bfqg_stats_waiting(stats)) return; - now = sched_clock(); - if (time_after64(now, stats->start_group_wait_time)) + now = ktime_get_ns(); + if (now > stats->start_group_wait_time) blkg_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); @@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, return; if (bfqg == curr_bfqg) return; - stats->start_group_wait_time = sched_clock(); + stats->start_group_wait_time = ktime_get_ns(); bfqg_stats_mark_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { - unsigned long long now; + u64 now; if (!bfqg_stats_empty(stats)) return; - now = sched_clock(); - if (time_after64(now, stats->start_empty_time)) + now = ktime_get_ns(); + if (now > stats->start_empty_time) blkg_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); @@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) if (bfqg_stats_empty(stats)) return; - stats->start_empty_time = sched_clock(); + stats->start_empty_time = ktime_get_ns(); bfqg_stats_mark_empty(stats); } @@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg) struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { - unsigned long long now = sched_clock(); + u64 now = ktime_get_ns(); - if (time_after64(now, stats->start_idle_time)) + if (now > stats->start_idle_time) blkg_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); @@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; - stats->start_idle_time = sched_clock(); + stats->start_idle_time = ktime_get_ns(); bfqg_stats_mark_idling(stats); } @@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) blkg_rwstat_add(&bfqg->stats.merged, op, 1); } -void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, - uint64_t io_start_time, unsigned int op) +void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, + u64 io_start_time_ns, unsigned int op) { struct bfqg_stats *stats = &bfqg->stats; - unsigned long long now = sched_clock(); + u64 now = ktime_get_ns(); - if (time_after64(now, io_start_time)) + if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, op, - now - io_start_time); - if (time_after64(io_start_time, start_time)) + now - io_start_time_ns); + if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, op, - io_start_time - start_time); + io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ @@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, unsigned int op) { } void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, - uint64_t io_start_time, unsigned int op) { } +void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, + u64 io_start_time_ns, unsigned int op) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 771ae9730ac6..495b9ddb3355 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -49,9 +49,39 @@ * * In particular, to provide these low-latency guarantees, BFQ * explicitly privileges the I/O of two classes of time-sensitive - * applications: interactive and soft real-time. This feature enables - * BFQ to provide applications in these classes with a very low - * latency. Finally, BFQ also features additional heuristics for + * applications: interactive and soft real-time. In more detail, BFQ + * behaves this way if the low_latency parameter is set (default + * configuration). This feature enables BFQ to provide applications in + * these classes with a very low latency. + * + * To implement this feature, BFQ constantly tries to detect whether + * the I/O requests in a bfq_queue come from an interactive or a soft + * real-time application. For brevity, in these cases, the queue is + * said to be interactive or soft real-time. In both cases, BFQ + * privileges the service of the queue, over that of non-interactive + * and non-soft-real-time queues. This privileging is performed, + * mainly, by raising the weight of the queue. So, for brevity, we + * call just weight-raising periods the time periods during which a + * queue is privileged, because deemed interactive or soft real-time. + * + * The detection of soft real-time queues/applications is described in + * detail in the comments on the function + * bfq_bfqq_softrt_next_start. On the other hand, the detection of an + * interactive queue works as follows: a queue is deemed interactive + * if it is constantly non empty only for a limited time interval, + * after which it does become empty. The queue may be deemed + * interactive again (for a limited time), if it restarts being + * constantly non empty, provided that this happens only after the + * queue has remained empty for a given minimum idle time. + * + * By default, BFQ computes automatically the above maximum time + * interval, i.e., the time interval after which a constantly + * non-empty queue stops being deemed interactive. Since a queue is + * weight-raised while it is deemed interactive, this maximum time + * interval happens to coincide with the (maximum) duration of the + * weight-raising for interactive queues. + * + * Finally, BFQ also features additional heuristics for * preserving both a low latency and a high throughput on NCQ-capable, * rotational or flash-based devices, and to get the job done quickly * for applications consisting in many I/O-bound processes. @@ -61,14 +91,14 @@ * all low-latency heuristics for that device, by setting low_latency * to 0. * - * BFQ is described in [1], where also a reference to the initial, more - * theoretical paper on BFQ can be found. The interested reader can find - * in the latter paper full details on the main algorithm, as well as - * formulas of the guarantees and formal proofs of all the properties. - * With respect to the version of BFQ presented in these papers, this - * implementation adds a few more heuristics, such as the one that - * guarantees a low latency to soft real-time applications, and a - * hierarchical extension based on H-WF2Q+. + * BFQ is described in [1], where also a reference to the initial, + * more theoretical paper on BFQ can be found. The interested reader + * can find in the latter paper full details on the main algorithm, as + * well as formulas of the guarantees and formal proofs of all the + * properties. With respect to the version of BFQ presented in these + * papers, this implementation adds a few more heuristics, such as the + * ones that guarantee a low latency to interactive and soft real-time + * applications, and a hierarchical extension based on H-WF2Q+. * * B-WF2Q+ is based on WF2Q+, which is described in [2], together with * H-WF2Q+, while the augmented tree used here to implement B-WF2Q+ @@ -218,56 +248,46 @@ static struct kmem_cache *bfq_pool; #define BFQ_RATE_SHIFT 16 /* - * By default, BFQ computes the duration of the weight raising for - * interactive applications automatically, using the following formula: - * duration = (R / r) * T, where r is the peak rate of the device, and - * R and T are two reference parameters. - * In particular, R is the peak rate of the reference device (see - * below), and T is a reference time: given the systems that are - * likely to be installed on the reference device according to its - * speed class, T is about the maximum time needed, under BFQ and - * while reading two files in parallel, to load typical large - * applications on these systems (see the comments on - * max_service_from_wr below, for more details on how T is obtained). - * In practice, the slower/faster the device at hand is, the more/less - * it takes to load applications with respect to the reference device. - * Accordingly, the longer/shorter BFQ grants weight raising to - * interactive applications. - * - * BFQ uses four different reference pairs (R, T), depending on: - * . whether the device is rotational or non-rotational; - * . whether the device is slow, such as old or portable HDDs, as well as - * SD cards, or fast, such as newer HDDs and SSDs. + * When configured for computing the duration of the weight-raising + * for interactive queues automatically (see the comments at the + * beginning of this file), BFQ does it using the following formula: + * duration = (ref_rate / r) * ref_wr_duration, + * where r is the peak rate of the device, and ref_rate and + * ref_wr_duration are two reference parameters. In particular, + * ref_rate is the peak rate of the reference storage device (see + * below), and ref_wr_duration is about the maximum time needed, with + * BFQ and while reading two files in parallel, to load typical large + * applications on the reference device (see the comments on + * max_service_from_wr below, for more details on how ref_wr_duration + * is obtained). In practice, the slower/faster the device at hand + * is, the more/less it takes to load applications with respect to the + * reference device. Accordingly, the longer/shorter BFQ grants + * weight raising to interactive applications. * - * The device's speed class is dynamically (re)detected in - * bfq_update_peak_rate() every time the estimated peak rate is updated. + * BFQ uses two different reference pairs (ref_rate, ref_wr_duration), + * depending on whether the device is rotational or non-rotational. * - * In the following definitions, R_slow[0]/R_fast[0] and - * T_slow[0]/T_fast[0] are the reference values for a slow/fast - * rotational device, whereas R_slow[1]/R_fast[1] and - * T_slow[1]/T_fast[1] are the reference values for a slow/fast - * non-rotational device. Finally, device_speed_thresh are the - * thresholds used to switch between speed classes. The reference - * rates are not the actual peak rates of the devices used as a - * reference, but slightly lower values. The reason for using these - * slightly lower values is that the peak-rate estimator tends to - * yield slightly lower values than the actual peak rate (it can yield - * the actual peak rate only if there is only one process doing I/O, - * and the process does sequential I/O). + * In the following definitions, ref_rate[0] and ref_wr_duration[0] + * are the reference values for a rotational device, whereas + * ref_rate[1] and ref_wr_duration[1] are the reference values for a + * non-rotational device. The reference rates are not the actual peak + * rates of the devices used as a reference, but slightly lower + * values. The reason for using slightly lower values is that the + * peak-rate estimator tends to yield slightly lower values than the + * actual peak rate (it can yield the actual peak rate only if there + * is only one process doing I/O, and the process does sequential + * I/O). * - * Both the reference peak rates and the thresholds are measured in - * sectors/usec, left-shifted by BFQ_RATE_SHIFT. + * The reference peak rates are measured in sectors/usec, left-shifted + * by BFQ_RATE_SHIFT. */ -static int R_slow[2] = {1000, 10700}; -static int R_fast[2] = {14000, 33000}; +static int ref_rate[2] = {14000, 33000}; /* - * To improve readability, a conversion function is used to initialize the - * following arrays, which entails that they can be initialized only in a - * function. + * To improve readability, a conversion function is used to initialize + * the following array, which entails that the array can be + * initialized only in a function. */ -static int T_slow[2]; -static int T_fast[2]; -static int device_speed_thresh[2]; +static int ref_wr_duration[2]; /* * BFQ uses the above-detailed, time-based weight-raising mechanism to @@ -487,46 +507,6 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, } /* - * See the comments on bfq_limit_depth for the purpose of - * the depths set in the function. - */ -static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) -{ - bfqd->sb_shift = bt->sb.shift; - - /* - * In-word depths if no bfq_queue is being weight-raised: - * leaving 25% of tags only for sync reads. - * - * In next formulas, right-shift the value - * (1U<<bfqd->sb_shift), instead of computing directly - * (1U<<(bfqd->sb_shift - something)), to be robust against - * any possible value of bfqd->sb_shift, without having to - * limit 'something'. - */ - /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U); - /* - * no more than 75% of tags for sync writes (25% extra tags - * w.r.t. async I/O, to prevent async I/O from starving sync - * writes) - */ - bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U); - - /* - * In-word depths in case some bfq_queue is being weight- - * raised: leaving ~63% of tags for sync reads. This is the - * highest percentage for which, in our tests, application - * start-up times didn't suffer from any regression due to tag - * shortage. - */ - /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U); - /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U); -} - -/* * Async I/O can easily starve sync I/O (both sync reads and sync * writes), by consuming all tags. Similarly, storms of sync writes, * such as those that sync(2) may trigger, can starve sync reads. @@ -535,25 +515,11 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) */ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) { - struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct bfq_data *bfqd = data->q->elevator->elevator_data; - struct sbitmap_queue *bt; if (op_is_sync(op) && !op_is_write(op)) return; - if (data->flags & BLK_MQ_REQ_RESERVED) { - if (unlikely(!tags->nr_reserved_tags)) { - WARN_ON_ONCE(1); - return; - } - bt = &tags->breserved_tags; - } else - bt = &tags->bitmap_tags; - - if (unlikely(bfqd->sb_shift != bt->sb.shift)) - bfq_update_depths(bfqd, bt); - data->shallow_depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; @@ -906,26 +872,30 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) if (bfqd->bfq_wr_max_time > 0) return bfqd->bfq_wr_max_time; - dur = bfqd->RT_prod; + dur = bfqd->rate_dur_prod; do_div(dur, bfqd->peak_rate); /* - * Limit duration between 3 and 13 seconds. Tests show that - * higher values than 13 seconds often yield the opposite of - * the desired result, i.e., worsen responsiveness by letting - * non-interactive and non-soft-real-time applications - * preserve weight raising for a too long time interval. + * Limit duration between 3 and 25 seconds. The upper limit + * has been conservatively set after the following worst case: + * on a QEMU/KVM virtual machine + * - running in a slow PC + * - with a virtual disk stacked on a slow low-end 5400rpm HDD + * - serving a heavy I/O workload, such as the sequential reading + * of several files + * mplayer took 23 seconds to start, if constantly weight-raised. + * + * As for higher values than that accomodating the above bad + * scenario, tests show that higher values would often yield + * the opposite of the desired result, i.e., would worsen + * responsiveness by allowing non-interactive applications to + * preserve weight raising for too long. * * On the other end, lower values than 3 seconds make it * difficult for most interactive tasks to complete their jobs * before weight-raising finishes. */ - if (dur > msecs_to_jiffies(13000)) - dur = msecs_to_jiffies(13000); - else if (dur < msecs_to_jiffies(3000)) - dur = msecs_to_jiffies(3000); - - return dur; + return clamp_val(dur, msecs_to_jiffies(3000), msecs_to_jiffies(25000)); } /* switch back from soft real-time to interactive weight raising */ @@ -1393,15 +1363,6 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, } /* - * Return the farthest future time instant according to jiffies - * macros. - */ -static unsigned long bfq_greatest_from_now(void) -{ - return jiffies + MAX_JIFFY_OFFSET; -} - -/* * Return the farthest past time instant according to jiffies * macros. */ @@ -1545,7 +1506,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, in_burst = bfq_bfqq_in_large_burst(bfqq); soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && !in_burst && - time_is_before_jiffies(bfqq->soft_rt_next_start); + time_is_before_jiffies(bfqq->soft_rt_next_start) && + bfqq->dispatched == 0; *interactive = !in_burst && idle_for_long_time; wr_or_deserves_wr = bfqd->low_latency && (bfqq->wr_coeff > 1 || @@ -1858,6 +1820,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -1866,7 +1830,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = RQ_BFQQ(req); + struct bfq_queue *bfqq = bfq_init_rq(req); struct bfq_data *bfqd = bfqq->bfqd; struct request *prev, *next_rq; @@ -1891,14 +1855,25 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, } } +/* + * This function is called to notify the scheduler that the requests + * rq and 'next' have been merged, with 'next' going away. BFQ + * exploits this hook to address the following issue: if 'next' has a + * fifo_time lower that rq, then the fifo_time of rq must be set to + * the value of 'next', to not forget the greater age of 'next'. + * + * NOTE: in this function we assume that rq is in a bfq_queue, basing + * on that rq is picked from the hash table q->elevator->hash, which, + * in its turn, is filled only with I/O requests present in + * bfq_queues, while BFQ is in use for the request queue q. In fact, + * the function that fills this hash table (elv_rqhash_add) is called + * only by bfq_insert_request. + */ static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next); - - if (!RB_EMPTY_NODE(&rq->rb_node)) - goto end; - spin_lock_irq(&bfqq->bfqd->lock); + struct bfq_queue *bfqq = bfq_init_rq(rq), + *next_bfqq = bfq_init_rq(next); /* * If next and rq belong to the same bfq_queue and next is older @@ -1920,11 +1895,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, if (bfqq->next_rq == next) bfqq->next_rq = rq; - bfq_remove_request(q, next); - bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags); - - spin_unlock_irq(&bfqq->bfqd->lock); -end: bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); } @@ -2506,37 +2476,15 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd) /* * Update parameters related to throughput and responsiveness, as a * function of the estimated peak rate. See comments on - * bfq_calc_max_budget(), and on T_slow and T_fast arrays. + * bfq_calc_max_budget(), and on the ref_wr_duration array. */ static void update_thr_responsiveness_params(struct bfq_data *bfqd) { - int dev_type = blk_queue_nonrot(bfqd->queue); - - if (bfqd->bfq_user_max_budget == 0) + if (bfqd->bfq_user_max_budget == 0) { bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); - - if (bfqd->device_speed == BFQ_BFQD_FAST && - bfqd->peak_rate < device_speed_thresh[dev_type]) { - bfqd->device_speed = BFQ_BFQD_SLOW; - bfqd->RT_prod = R_slow[dev_type] * - T_slow[dev_type]; - } else if (bfqd->device_speed == BFQ_BFQD_SLOW && - bfqd->peak_rate > device_speed_thresh[dev_type]) { - bfqd->device_speed = BFQ_BFQD_FAST; - bfqd->RT_prod = R_fast[dev_type] * - T_fast[dev_type]; + bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget); } - - bfq_log(bfqd, -"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec", - dev_type == 0 ? "ROT" : "NONROT", - bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW", - bfqd->device_speed == BFQ_BFQD_FAST ? - (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT : - (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT, - (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>> - BFQ_RATE_SHIFT); } static void bfq_reset_rate_computation(struct bfq_data *bfqd, @@ -3266,23 +3214,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bfq_bfqq_softrt_next_start(bfqd, bfqq); else { /* - * The application is still waiting for the - * completion of one or more requests: - * prevent it from possibly being incorrectly - * deemed as soft real-time by setting its - * soft_rt_next_start to infinity. In fact, - * without this assignment, the application - * would be incorrectly deemed as soft - * real-time if: - * 1) it issued a new request before the - * completion of all its in-flight - * requests, and - * 2) at that time, its soft_rt_next_start - * happened to be in the past. - */ - bfqq->soft_rt_next_start = - bfq_greatest_from_now(); - /* * Schedule an update of soft_rt_next_start to when * the task may be discovered to be isochronous. */ @@ -4540,14 +4471,12 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif -static void bfq_prepare_request(struct request *rq, struct bio *bio); - static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_queue *bfqq; bool idle_timer_disabled = false; unsigned int cmd_flags; @@ -4562,24 +4491,13 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_mq_sched_request_inserted(rq); spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else list_add_tail(&rq->queuelist, &bfqd->dispatch); - } else { - if (WARN_ON_ONCE(!bfqq)) { - /* - * This should never happen. Most likely rq is - * a requeued regular request, being - * re-inserted without being first - * re-prepared. Do a prepare, to avoid - * failure. - */ - bfq_prepare_request(rq, rq->bio); - bfqq = RQ_BFQQ(rq); - } - + } else { /* bfqq is assumed to be non null here */ idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred @@ -4778,8 +4696,8 @@ static void bfq_finish_requeue_request(struct request *rq) if (rq->rq_flags & RQF_STARTED) bfqg_stats_update_completion(bfqq_group(bfqq), - rq_start_time_ns(rq), - rq_io_start_time_ns(rq), + rq->start_time_ns, + rq->io_start_time_ns, rq->cmd_flags); if (likely(rq->rq_flags & RQF_STARTED)) { @@ -4922,11 +4840,48 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, } /* - * Allocate bfq data structures associated with this request. + * Only reset private fields. The actual request preparation will be + * performed by bfq_init_rq, when rq is either inserted or merged. See + * comments on bfq_init_rq for the reason behind this delayed + * preparation. */ static void bfq_prepare_request(struct request *rq, struct bio *bio) { + /* + * Regardless of whether we have an icq attached, we have to + * clear the scheduler pointers, as they might point to + * previously allocated bic/bfqq structs. + */ + rq->elv.priv[0] = rq->elv.priv[1] = NULL; +} + +/* + * If needed, init rq, allocate bfq data structures associated with + * rq, and increment reference counters in the destination bfq_queue + * for rq. Return the destination bfq_queue for rq, or NULL is rq is + * not associated with any bfq_queue. + * + * This function is invoked by the functions that perform rq insertion + * or merging. One may have expected the above preparation operations + * to be performed in bfq_prepare_request, and not delayed to when rq + * is inserted or merged. The rationale behind this delayed + * preparation is that, after the prepare_request hook is invoked for + * rq, rq may still be transformed into a request with no icq, i.e., a + * request not associated with any queue. No bfq hook is invoked to + * signal this tranformation. As a consequence, should these + * preparation operations be performed when the prepare_request hook + * is invoked, and should rq be transformed one moment later, bfq + * would end up in an inconsistent state, because it would have + * incremented some queue counters for an rq destined to + * transformation, without any chance to correctly lower these + * counters back. In contrast, no transformation can still happen for + * rq after rq has been inserted or merged. So, it is safe to execute + * these preparation operations when rq is finally inserted or merged. + */ +static struct bfq_queue *bfq_init_rq(struct request *rq) +{ struct request_queue *q = rq->q; + struct bio *bio = rq->bio; struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_io_cq *bic; const int is_sync = rq_is_sync(rq); @@ -4934,20 +4889,21 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) bool new_queue = false; bool bfqq_already_existing = false, split = false; + if (unlikely(!rq->elv.icq)) + return NULL; + /* - * Even if we don't have an icq attached, we should still clear - * the scheduler pointers, as they might point to previously - * allocated bic/bfqq structs. + * Assuming that elv.priv[1] is set only if everything is set + * for this rq. This holds true, because this function is + * invoked only for insertion or merging, and, after such + * events, a request cannot be manipulated any longer before + * being removed from bfq. */ - if (!rq->elv.icq) { - rq->elv.priv[0] = rq->elv.priv[1] = NULL; - return; - } + if (rq->elv.priv[1]) + return rq->elv.priv[1]; bic = icq_to_bic(rq->elv.icq); - spin_lock_irq(&bfqd->lock); - bfq_check_ioprio_change(bic, bio); bfq_bic_update_cgroup(bic, bio); @@ -5006,7 +4962,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio) if (unlikely(bfq_bfqq_just_created(bfqq))) bfq_handle_burst(bfqd, bfqq); - spin_unlock_irq(&bfqd->lock); + return bfqq; } static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) @@ -5105,6 +5061,64 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); } +/* + * See the comments on bfq_limit_depth for the purpose of + * the depths set in the function. Return minimum shallow depth we'll use. + */ +static unsigned int bfq_update_depths(struct bfq_data *bfqd, + struct sbitmap_queue *bt) +{ + unsigned int i, j, min_shallow = UINT_MAX; + + /* + * In-word depths if no bfq_queue is being weight-raised: + * leaving 25% of tags only for sync reads. + * + * In next formulas, right-shift the value + * (1U<<bt->sb.shift), instead of computing directly + * (1U<<(bt->sb.shift - something)), to be robust against + * any possible value of bt->sb.shift, without having to + * limit 'something'. + */ + /* no more than 50% of tags for async I/O */ + bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + /* + * no more than 75% of tags for sync writes (25% extra tags + * w.r.t. async I/O, to prevent async I/O from starving sync + * writes) + */ + bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + + /* + * In-word depths in case some bfq_queue is being weight- + * raised: leaving ~63% of tags for sync reads. This is the + * highest percentage for which, in our tests, application + * start-up times didn't suffer from any regression due to tag + * shortage. + */ + /* no more than ~18% of tags for async I/O */ + bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + /* no more than ~37% of tags for sync writes (~20% extra tags) */ + bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + min_shallow = min(min_shallow, bfqd->word_depths[i][j]); + + return min_shallow; +} + +static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) +{ + struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + struct blk_mq_tags *tags = hctx->sched_tags; + unsigned int min_shallow; + + min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); + sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); + return 0; +} + static void bfq_exit_queue(struct elevator_queue *e) { struct bfq_data *bfqd = e->elevator_data; @@ -5242,14 +5256,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->wr_busy_queues = 0; /* - * Begin by assuming, optimistically, that the device is a - * high-speed one, and that its peak rate is equal to 2/3 of - * the highest reference rate. + * Begin by assuming, optimistically, that the device peak + * rate is equal to 2/3 of the highest reference rate. */ - bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * - T_fast[blk_queue_nonrot(bfqd->queue)]; - bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3; - bfqd->device_speed = BFQ_BFQD_FAST; + bfqd->rate_dur_prod = ref_rate[blk_queue_nonrot(bfqd->queue)] * + ref_wr_duration[blk_queue_nonrot(bfqd->queue)]; + bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3; spin_lock_init(&bfqd->lock); @@ -5526,6 +5538,7 @@ static struct elevator_type iosched_bfq_mq = { .requests_merged = bfq_requests_merged, .request_merged = bfq_request_merged, .has_work = bfq_has_work, + .init_hctx = bfq_init_hctx, .init_sched = bfq_init_queue, .exit_sched = bfq_exit_queue, }, @@ -5556,8 +5569,8 @@ static int __init bfq_init(void) /* * Times to load large popular applications for the typical * systems installed on the reference devices (see the - * comments before the definitions of the next two - * arrays). Actually, we use slightly slower values, as the + * comments before the definition of the next + * array). Actually, we use slightly lower values, as the * estimated peak rate tends to be smaller than the actual * peak rate. The reason for this last fact is that estimates * are computed over much shorter time intervals than the long @@ -5566,25 +5579,8 @@ static int __init bfq_init(void) * scheduler cannot rely on a peak-rate-evaluation workload to * be run for a long time. */ - T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */ - T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */ - T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */ - T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */ - - /* - * Thresholds that determine the switch between speed classes - * (see the comments before the definition of the array - * device_speed_thresh). These thresholds are biased towards - * transitions to the fast class. This is safer than the - * opposite bias. In fact, a wrong transition to the slow - * class results in short weight-raising periods, because the - * speed of the device then tends to be higher that the - * reference peak rate. On the opposite end, a wrong - * transition to the fast class tends to increase - * weight-raising periods, because of the opposite reason. - */ - device_speed_thresh[0] = (4 * R_slow[0]) / 3; - device_speed_thresh[1] = (4 * R_slow[1]) / 3; + ref_wr_duration[0] = msecs_to_jiffies(7000); /* actually 8 sec */ + ref_wr_duration[1] = msecs_to_jiffies(2500); /* actually 3 sec */ ret = elv_register(&iosched_bfq_mq); if (ret) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ae2f3dadec44..0f712e03b035 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -399,11 +399,6 @@ struct bfq_io_cq { struct bfq_ttime saved_ttime; }; -enum bfq_device_speed { - BFQ_BFQD_FAST, - BFQ_BFQD_SLOW, -}; - /** * struct bfq_data - per-device data structure. * @@ -611,12 +606,11 @@ struct bfq_data { /* Max service-rate for a soft real-time queue, in sectors/sec */ unsigned int bfq_wr_max_softrt_rate; /* - * Cached value of the product R*T, used for computing the - * maximum duration of weight raising automatically. + * Cached value of the product ref_rate*ref_wr_duration, used + * for computing the maximum duration of weight raising + * automatically. */ - u64 RT_prod; - /* device-speed class for the low-latency heuristic */ - enum bfq_device_speed device_speed; + u64 rate_dur_prod; /* fallback dummy bfqq for extreme OOM conditions */ struct bfq_queue oom_bfqq; @@ -636,12 +630,6 @@ struct bfq_data { struct bfq_queue *bio_bfqq; /* - * Cached sbitmap shift, used to compute depth limits in - * bfq_update_depths. - */ - unsigned int sb_shift; - - /* * Depth limits used in bfq_limit_depth (see comments on the * function) */ @@ -732,9 +720,9 @@ struct bfqg_stats { /* total time with empty current active q with other requests queued */ struct blkg_stat empty_time; /* fields after this shouldn't be cleared on stat reset */ - uint64_t start_group_wait_time; - uint64_t start_idle_time; - uint64_t start_empty_time; + u64 start_group_wait_time; + u64 start_idle_time; + u64 start_empty_time; uint16_t flags; #endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ }; @@ -856,8 +844,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, unsigned int op); void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op); -void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, - uint64_t io_start_time, unsigned int op); +void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, + u64 io_start_time_ns, unsigned int op); void bfqg_stats_update_dequeue(struct bfq_group *bfqg); void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); void bfqg_stats_update_idle_time(struct bfq_group *bfqg); diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9cfdd6c83b5b..add7c7c85335 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -56,12 +56,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, struct bio_set *bs = bio->bi_pool; unsigned inline_vecs; - if (!bs || !bs->bio_integrity_pool) { + if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { bip = kmalloc(sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * nr_vecs, gfp_mask); inline_vecs = nr_vecs; } else { - bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); + bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); inline_vecs = BIP_INLINE_VECS; } @@ -74,7 +74,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned long idx = 0; bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, - bs->bvec_integrity_pool); + &bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; bip->bip_max_vcnt = bvec_nr_vecs(idx); @@ -90,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, return bip; err: - mempool_free(bip, bs->bio_integrity_pool); + mempool_free(bip, &bs->bio_integrity_pool); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(bio_integrity_alloc); @@ -111,10 +111,10 @@ static void bio_integrity_free(struct bio *bio) kfree(page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset); - if (bs && bs->bio_integrity_pool) { - bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); + if (bs && mempool_initialized(&bs->bio_integrity_pool)) { + bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); - mempool_free(bip, bs->bio_integrity_pool); + mempool_free(bip, &bs->bio_integrity_pool); } else { kfree(bip); } @@ -465,16 +465,15 @@ EXPORT_SYMBOL(bio_integrity_clone); int bioset_integrity_create(struct bio_set *bs, int pool_size) { - if (bs->bio_integrity_pool) + if (mempool_initialized(&bs->bio_integrity_pool)) return 0; - bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); - if (!bs->bio_integrity_pool) + if (mempool_init_slab_pool(&bs->bio_integrity_pool, + pool_size, bip_slab)) return -1; - bs->bvec_integrity_pool = biovec_create_pool(pool_size); - if (!bs->bvec_integrity_pool) { - mempool_destroy(bs->bio_integrity_pool); + if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) { + mempool_exit(&bs->bio_integrity_pool); return -1; } @@ -484,8 +483,8 @@ EXPORT_SYMBOL(bioset_integrity_create); void bioset_integrity_free(struct bio_set *bs) { - mempool_destroy(bs->bio_integrity_pool); - mempool_destroy(bs->bvec_integrity_pool); + mempool_exit(&bs->bio_integrity_pool); + mempool_exit(&bs->bvec_integrity_pool); } EXPORT_SYMBOL(bioset_integrity_free); diff --git a/block/bio.c b/block/bio.c index 53e0f0a1ed94..595663e0281a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -53,7 +53,7 @@ static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { * fs_bio_set is the bio_set containing bio and iovec memory pools used by * IO code that does not need private memory pools. */ -struct bio_set *fs_bio_set; +struct bio_set fs_bio_set; EXPORT_SYMBOL(fs_bio_set); /* @@ -254,7 +254,7 @@ static void bio_free(struct bio *bio) bio_uninit(bio); if (bs) { - bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); + bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -262,7 +262,7 @@ static void bio_free(struct bio *bio) p = bio; p -= bs->front_pad; - mempool_free(p, bs->bio_pool); + mempool_free(p, &bs->bio_pool); } else { /* Bio was allocated by bio_kmalloc() */ kfree(bio); @@ -454,7 +454,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, inline_vecs = nr_iovecs; } else { /* should not use nobvec bioset for nr_iovecs > 0 */ - if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0)) + if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && + nr_iovecs > 0)) return NULL; /* * generic_make_request() converts recursion to iteration; this @@ -483,11 +484,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, bs->rescue_workqueue) gfp_mask &= ~__GFP_DIRECT_RECLAIM; - p = mempool_alloc(bs->bio_pool, gfp_mask); + p = mempool_alloc(&bs->bio_pool, gfp_mask); if (!p && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - p = mempool_alloc(bs->bio_pool, gfp_mask); + p = mempool_alloc(&bs->bio_pool, gfp_mask); } front_pad = bs->front_pad; @@ -503,11 +504,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, if (nr_iovecs > inline_vecs) { unsigned long idx = 0; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool); } if (unlikely(!bvl)) @@ -524,25 +525,25 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, return bio; err_free: - mempool_free(p, bs->bio_pool); + mempool_free(p, &bs->bio_pool); return NULL; } EXPORT_SYMBOL(bio_alloc_bioset); -void zero_fill_bio(struct bio *bio) +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) { unsigned long flags; struct bio_vec bv; struct bvec_iter iter; - bio_for_each_segment(bv, bio, iter) { + __bio_for_each_segment(bv, bio, iter, start) { char *data = bvec_kmap_irq(&bv, &flags); memset(data, 0, bv.bv_len); flush_dcache_page(bv.bv_page); bvec_kunmap_irq(data, &flags); } } -EXPORT_SYMBOL(zero_fill_bio); +EXPORT_SYMBOL(zero_fill_bio_iter); /** * bio_put - release a reference to a bio @@ -970,27 +971,68 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); +void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter) +{ + struct bio_vec src_bv, dst_bv; + void *src_p, *dst_p; + unsigned bytes; + + while (src_iter->bi_size && dst_iter->bi_size) { + src_bv = bio_iter_iovec(src, *src_iter); + dst_bv = bio_iter_iovec(dst, *dst_iter); + + bytes = min(src_bv.bv_len, dst_bv.bv_len); + + src_p = kmap_atomic(src_bv.bv_page); + dst_p = kmap_atomic(dst_bv.bv_page); + + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + bytes); + + kunmap_atomic(dst_p); + kunmap_atomic(src_p); + + flush_dcache_page(dst_bv.bv_page); + + bio_advance_iter(src, src_iter, bytes); + bio_advance_iter(dst, dst_iter, bytes); + } +} +EXPORT_SYMBOL(bio_copy_data_iter); + /** - * bio_copy_data - copy contents of data buffers from one chain of bios to - * another - * @src: source bio list - * @dst: destination bio list - * - * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats - * @src and @dst as linked lists of bios. + * bio_copy_data - copy contents of data buffers from one bio to another + * @src: source bio + * @dst: destination bio * * Stops when it reaches the end of either @src or @dst - that is, copies * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). */ void bio_copy_data(struct bio *dst, struct bio *src) { - struct bvec_iter src_iter, dst_iter; - struct bio_vec src_bv, dst_bv; - void *src_p, *dst_p; - unsigned bytes; + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; - src_iter = src->bi_iter; - dst_iter = dst->bi_iter; + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); +} +EXPORT_SYMBOL(bio_copy_data); + +/** + * bio_list_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * Stops when it reaches the end of either the @src list or @dst list - that is, + * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of + * bios). + */ +void bio_list_copy_data(struct bio *dst, struct bio *src) +{ + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; while (1) { if (!src_iter.bi_size) { @@ -1009,26 +1051,10 @@ void bio_copy_data(struct bio *dst, struct bio *src) dst_iter = dst->bi_iter; } - src_bv = bio_iter_iovec(src, src_iter); - dst_bv = bio_iter_iovec(dst, dst_iter); - - bytes = min(src_bv.bv_len, dst_bv.bv_len); - - src_p = kmap_atomic(src_bv.bv_page); - dst_p = kmap_atomic(dst_bv.bv_page); - - memcpy(dst_p + dst_bv.bv_offset, - src_p + src_bv.bv_offset, - bytes); - - kunmap_atomic(dst_p); - kunmap_atomic(src_p); - - bio_advance_iter(src, &src_iter, bytes); - bio_advance_iter(dst, &dst_iter, bytes); + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); } } -EXPORT_SYMBOL(bio_copy_data); +EXPORT_SYMBOL(bio_list_copy_data); struct bio_map_data { int is_our_pages; @@ -1584,6 +1610,7 @@ void bio_set_pages_dirty(struct bio *bio) set_page_dirty_lock(page); } } +EXPORT_SYMBOL_GPL(bio_set_pages_dirty); static void bio_release_pages(struct bio *bio) { @@ -1667,6 +1694,7 @@ void bio_check_pages_dirty(struct bio *bio) bio_put(bio); } } +EXPORT_SYMBOL_GPL(bio_check_pages_dirty); void generic_start_io_acct(struct request_queue *q, int rw, unsigned long sectors, struct hd_struct *part) @@ -1749,6 +1777,9 @@ again: if (!bio_integrity_endio(bio)) return; + if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL")) + bio->bi_next = NULL; + /* * Need to have a real endio function for chained bios, otherwise * various corner cases will break (like stacking block devices that @@ -1848,30 +1879,38 @@ EXPORT_SYMBOL_GPL(bio_trim); * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. */ -mempool_t *biovec_create_pool(int pool_entries) +int biovec_init_pool(mempool_t *pool, int pool_entries) { struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX; - return mempool_create_slab_pool(pool_entries, bp->slab); + return mempool_init_slab_pool(pool, pool_entries, bp->slab); } -void bioset_free(struct bio_set *bs) +/* + * bioset_exit - exit a bioset initialized with bioset_init() + * + * May be called on a zeroed but uninitialized bioset (i.e. allocated with + * kzalloc()). + */ +void bioset_exit(struct bio_set *bs) { if (bs->rescue_workqueue) destroy_workqueue(bs->rescue_workqueue); + bs->rescue_workqueue = NULL; - mempool_destroy(bs->bio_pool); - mempool_destroy(bs->bvec_pool); + mempool_exit(&bs->bio_pool); + mempool_exit(&bs->bvec_pool); bioset_integrity_free(bs); - bio_put_slab(bs); - - kfree(bs); + if (bs->bio_slab) + bio_put_slab(bs); + bs->bio_slab = NULL; } -EXPORT_SYMBOL(bioset_free); +EXPORT_SYMBOL(bioset_exit); /** - * bioset_create - Create a bio_set + * bioset_init - Initialize a bio_set + * @bs: pool to initialize * @pool_size: Number of bio and bio_vecs to cache in the mempool * @front_pad: Number of bytes to allocate in front of the returned bio * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS @@ -1890,16 +1929,12 @@ EXPORT_SYMBOL(bioset_free); * dispatch queued requests when the mempool runs out of space. * */ -struct bio_set *bioset_create(unsigned int pool_size, - unsigned int front_pad, - int flags) +int bioset_init(struct bio_set *bs, + unsigned int pool_size, + unsigned int front_pad, + int flags) { unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); - struct bio_set *bs; - - bs = kzalloc(sizeof(*bs), GFP_KERNEL); - if (!bs) - return NULL; bs->front_pad = front_pad; @@ -1908,34 +1943,29 @@ struct bio_set *bioset_create(unsigned int pool_size, INIT_WORK(&bs->rescue_work, bio_alloc_rescue); bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); - if (!bs->bio_slab) { - kfree(bs); - return NULL; - } + if (!bs->bio_slab) + return -ENOMEM; - bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); - if (!bs->bio_pool) + if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab)) goto bad; - if (flags & BIOSET_NEED_BVECS) { - bs->bvec_pool = biovec_create_pool(pool_size); - if (!bs->bvec_pool) - goto bad; - } + if ((flags & BIOSET_NEED_BVECS) && + biovec_init_pool(&bs->bvec_pool, pool_size)) + goto bad; if (!(flags & BIOSET_NEED_RESCUER)) - return bs; + return 0; bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); if (!bs->rescue_workqueue) goto bad; - return bs; + return 0; bad: - bioset_free(bs); - return NULL; + bioset_exit(bs); + return -ENOMEM; } -EXPORT_SYMBOL(bioset_create); +EXPORT_SYMBOL(bioset_init); #ifdef CONFIG_BLK_CGROUP @@ -2020,11 +2050,10 @@ static int __init init_bio(void) bio_integrity_init(); biovec_init_slabs(); - fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!fs_bio_set) + if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) panic("bio: can't allocate bios\n"); - if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) + if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE)) panic("bio: can't create integrity pool\n"); return 0; diff --git a/block/blk-core.c b/block/blk-core.c index 85909b431eb0..3f56be15f17e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -196,15 +196,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) RB_CLEAR_NODE(&rq->rb_node); rq->tag = -1; rq->internal_tag = -1; - rq->start_time = jiffies; - set_start_time_ns(rq); + rq->start_time_ns = ktime_get_ns(); rq->part = NULL; - seqcount_init(&rq->gstate_seq); - u64_stats_init(&rq->aborted_gstate_sync); - /* - * See comment of blk_mq_init_request - */ - WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); } EXPORT_SYMBOL(blk_rq_init); @@ -280,6 +273,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ + /* + * XXX this code looks suspicious - it's not consistent with advancing + * req->bio in caller + */ if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) bio_endio(bio); } @@ -360,7 +357,6 @@ EXPORT_SYMBOL(blk_start_queue_async); void blk_start_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); - WARN_ON(!in_interrupt() && !irqs_disabled()); WARN_ON_ONCE(q->mq_ops); queue_flag_clear(QUEUE_FLAG_STOPPED, q); @@ -996,18 +992,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, spinlock_t *lock) { struct request_queue *q; + int ret; q = kmem_cache_alloc_node(blk_requestq_cachep, gfp_mask | __GFP_ZERO, node_id); if (!q) return NULL; + INIT_LIST_HEAD(&q->queue_head); + q->last_merge = NULL; + q->end_sector = 0; + q->boundary_rq = NULL; + q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) goto fail_q; - q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!q->bio_split) + ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (ret) goto fail_id; q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); @@ -1079,7 +1081,7 @@ fail_bdi: fail_stats: bdi_put(q->backing_dev_info); fail_split: - bioset_free(q->bio_split); + bioset_exit(&q->bio_split); fail_id: ida_simple_remove(&blk_queue_ida, q->id); fail_q: @@ -1173,16 +1175,8 @@ int blk_init_allocated_queue(struct request_queue *q) q->sg_reserved_size = INT_MAX; - /* Protect q->elevator from elevator_change */ - mutex_lock(&q->sysfs_lock); - - /* init elevator */ - if (elevator_init(q, NULL)) { - mutex_unlock(&q->sysfs_lock); + if (elevator_init(q)) goto out_exit_flush_rq; - } - - mutex_unlock(&q->sysfs_lock); return 0; out_exit_flush_rq: @@ -1334,6 +1328,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr) * @op: operation and flags * @bio: bio to allocate request for (can be %NULL) * @flags: BLQ_MQ_REQ_* flags + * @gfp_mask: allocator flags * * Get a free request from @q. This function may fail under memory * pressure or if @q is dead. @@ -1343,7 +1338,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr) * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *__get_request(struct request_list *rl, unsigned int op, - struct bio *bio, blk_mq_req_flags_t flags) + struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask) { struct request_queue *q = rl->q; struct request *rq; @@ -1352,8 +1347,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op, struct io_cq *icq = NULL; const bool is_sync = op_is_sync(op); int may_queue; - gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : - __GFP_DIRECT_RECLAIM; req_flags_t rq_flags = RQF_ALLOCED; lockdep_assert_held(q->queue_lock); @@ -1517,8 +1510,9 @@ rq_starved: * @op: operation and flags * @bio: bio to allocate request for (can be %NULL) * @flags: BLK_MQ_REQ_* flags. + * @gfp: allocator flags * - * Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask, + * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags, * this function keeps retrying under memory pressure and fails iff @q is dead. * * Must be called with @q->queue_lock held and, @@ -1526,7 +1520,7 @@ rq_starved: * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, unsigned int op, - struct bio *bio, blk_mq_req_flags_t flags) + struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp) { const bool is_sync = op_is_sync(op); DEFINE_WAIT(wait); @@ -1538,7 +1532,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op, rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: - rq = __get_request(rl, op, bio, flags); + rq = __get_request(rl, op, bio, flags, gfp); if (!IS_ERR(rq)) return rq; @@ -1579,8 +1573,7 @@ static struct request *blk_old_get_request(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags) { struct request *rq; - gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : - __GFP_DIRECT_RECLAIM; + gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO; int ret = 0; WARN_ON_ONCE(q->mq_ops); @@ -1592,7 +1585,7 @@ static struct request *blk_old_get_request(struct request_queue *q, if (ret) return ERR_PTR(ret); spin_lock_irq(q->queue_lock); - rq = get_request(q, op, NULL, flags); + rq = get_request(q, op, NULL, flags, gfp_mask); if (IS_ERR(rq)) { spin_unlock_irq(q->queue_lock); blk_queue_exit(q); @@ -1607,13 +1600,13 @@ static struct request *blk_old_get_request(struct request_queue *q, } /** - * blk_get_request_flags - allocate a request + * blk_get_request - allocate a request * @q: request queue to allocate a request for * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC. * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. */ -struct request *blk_get_request_flags(struct request_queue *q, unsigned int op, - blk_mq_req_flags_t flags) +struct request *blk_get_request(struct request_queue *q, unsigned int op, + blk_mq_req_flags_t flags) { struct request *req; @@ -1632,14 +1625,6 @@ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op, return req; } -EXPORT_SYMBOL(blk_get_request_flags); - -struct request *blk_get_request(struct request_queue *q, unsigned int op, - gfp_t gfp_mask) -{ - return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ? - 0 : BLK_MQ_REQ_NOWAIT); -} EXPORT_SYMBOL(blk_get_request); /** @@ -1660,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); - wbt_requeue(q->rq_wb, &rq->issue_stat); + wbt_requeue(q->rq_wb, rq); if (rq->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, rq); @@ -1767,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) /* this is a bio leak */ WARN_ON(req->bio != NULL); - wbt_done(q->rq_wb, &req->issue_stat); + wbt_done(q->rq_wb, req); /* * Request may not have originated from ll_rw_blk. if not, @@ -2066,7 +2051,7 @@ get_rq: * Returns with the queue unlocked. */ blk_queue_enter_live(q); - req = get_request(q, bio->bi_opf, bio, 0); + req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); if (IS_ERR(req)) { blk_queue_exit(q); __wbt_done(q->rq_wb, wb_acct); @@ -2078,7 +2063,7 @@ get_rq: goto out_unlock; } - wbt_track(&req->issue_stat, wb_acct); + wbt_track(req, wb_acct); /* * After dropping the lock and possibly sleeping here, our request @@ -2392,7 +2377,9 @@ blk_qc_t generic_make_request(struct bio *bio) if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; - if (blk_queue_enter(q, flags) < 0) { + if (bio_flagged(bio, BIO_QUEUE_ENTERED)) + blk_queue_enter_live(q); + else if (blk_queue_enter(q, flags) < 0) { if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) bio_wouldblock_error(bio); else @@ -2727,7 +2714,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes) } } -void blk_account_io_done(struct request *req) +void blk_account_io_done(struct request *req, u64 now) { /* * Account IO completion. flush_rq isn't accounted as a @@ -2735,11 +2722,12 @@ void blk_account_io_done(struct request *req) * containing request is enough. */ if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { - unsigned long duration = jiffies - req->start_time; + unsigned long duration; const int rw = rq_data_dir(req); struct hd_struct *part; int cpu; + duration = nsecs_to_jiffies(now - req->start_time_ns); cpu = part_stat_lock(); part = req->part; @@ -2970,10 +2958,8 @@ static void blk_dequeue_request(struct request *rq) * and to it is freed is accounted as io that is in progress at * the driver side. */ - if (blk_account_rq(rq)) { + if (blk_account_rq(rq)) q->in_flight[rq_is_sync(rq)]++; - set_io_start_time_ns(rq); - } } /** @@ -2992,9 +2978,12 @@ void blk_start_request(struct request *req) blk_dequeue_request(req); if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { - blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req)); + req->io_start_time_ns = ktime_get_ns(); +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + req->throtl_size = blk_rq_sectors(req); +#endif req->rq_flags |= RQF_STATS; - wbt_issue(req->q->rq_wb, &req->issue_stat); + wbt_issue(req->q->rq_wb, req); } BUG_ON(blk_rq_is_complete(req)); @@ -3092,8 +3081,10 @@ bool blk_update_request(struct request *req, blk_status_t error, struct bio *bio = req->bio; unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - if (bio_bytes == bio->bi_iter.bi_size) + if (bio_bytes == bio->bi_iter.bi_size) { req->bio = bio->bi_next; + bio->bi_next = NULL; + } /* Completion has already been traced */ bio_clear_flag(bio, BIO_TRACE_COMPLETION); @@ -3190,12 +3181,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); void blk_finish_request(struct request *req, blk_status_t error) { struct request_queue *q = req->q; + u64 now = ktime_get_ns(); lockdep_assert_held(req->q->queue_lock); WARN_ON_ONCE(q->mq_ops); if (req->rq_flags & RQF_STATS) - blk_stat_add(req); + blk_stat_add(req, now); if (req->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, req); @@ -3210,10 +3202,10 @@ void blk_finish_request(struct request *req, blk_status_t error) if (req->rq_flags & RQF_DONTPREP) blk_unprep_request(req); - blk_account_io_done(req); + blk_account_io_done(req, now); if (req->end_io) { - wbt_done(req->q->rq_wb, &req->issue_stat); + wbt_done(req->q->rq_wb, req); req->end_io(req, error); } else { if (blk_bidi_rq(req)) @@ -3519,7 +3511,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio *bio, *bio_src; if (!bs) - bs = fs_bio_set; + bs = &fs_bio_set; __rq_for_each_bio(bio_src, rq_src) { bio = bio_clone_fast(bio_src, gfp_mask, bs); @@ -3630,7 +3622,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, blk_run_queue_async(q); else __blk_run_queue(q); - spin_unlock(q->queue_lock); + spin_unlock_irq(q->queue_lock); } static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) @@ -3678,7 +3670,6 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; - unsigned long flags; struct request *rq; LIST_HEAD(list); unsigned int depth; @@ -3698,11 +3689,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) q = NULL; depth = 0; - /* - * Save and disable interrupts here, to avoid doing it for every - * queue lock we have to take. - */ - local_irq_save(flags); while (!list_empty(&list)) { rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); @@ -3715,7 +3701,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) queue_unplugged(q, depth, from_schedule); q = rq->q; depth = 0; - spin_lock(q->queue_lock); + spin_lock_irq(q->queue_lock); } /* @@ -3742,8 +3728,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) */ if (q) queue_unplugged(q, depth, from_schedule); - - local_irq_restore(flags); } void blk_finish_plug(struct blk_plug *plug) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index feb30570eaf5..6121611e1316 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page) } static struct integrity_sysfs_entry integrity_format_entry = { - .attr = { .name = "format", .mode = S_IRUGO }, + .attr = { .name = "format", .mode = 0444 }, .show = integrity_format_show, }; static struct integrity_sysfs_entry integrity_tag_size_entry = { - .attr = { .name = "tag_size", .mode = S_IRUGO }, + .attr = { .name = "tag_size", .mode = 0444 }, .show = integrity_tag_size_show, }; static struct integrity_sysfs_entry integrity_interval_entry = { - .attr = { .name = "protection_interval_bytes", .mode = S_IRUGO }, + .attr = { .name = "protection_interval_bytes", .mode = 0444 }, .show = integrity_interval_show, }; static struct integrity_sysfs_entry integrity_verify_entry = { - .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR }, + .attr = { .name = "read_verify", .mode = 0644 }, .show = integrity_verify_show, .store = integrity_verify_store, }; static struct integrity_sysfs_entry integrity_generate_entry = { - .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR }, + .attr = { .name = "write_generate", .mode = 0644 }, .show = integrity_generate_show, .store = integrity_generate_store, }; static struct integrity_sysfs_entry integrity_device_entry = { - .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO }, + .attr = { .name = "device_is_integrity_capable", .mode = 0444 }, .show = integrity_device_show, }; diff --git a/block/blk-lib.c b/block/blk-lib.c index a676084d4740..8faa70f26fcd 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -62,10 +62,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, unsigned int req_sects; sector_t end_sect, tmp; - /* Make sure bi_size doesn't overflow */ - req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); + /* + * Issue in chunks of the user defined max discard setting, + * ensuring that bi_size doesn't overflow + */ + req_sects = min_t(sector_t, nr_sects, + q->limits.max_discard_sectors); + if (req_sects > UINT_MAX >> 9) + req_sects = UINT_MAX >> 9; - /** + /* * If splitting a request, and the next starting sector would be * misaligned, stop the discard at the previous aligned sector. */ diff --git a/block/blk-merge.c b/block/blk-merge.c index 782940c65d8a..aaec38cc37b8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -188,16 +188,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio) switch (bio_op(*bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: - split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs); + split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs); break; case REQ_OP_WRITE_ZEROES: - split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs); + split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs); break; case REQ_OP_WRITE_SAME: - split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs); + split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs); break; default: - split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); + split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs); break; } @@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio) /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; + /* + * Since we're recursing into make_request here, ensure + * that we mark this bio as already having entered the queue. + * If not, and the queue is going away, we can get stuck + * forever on waiting for the queue reference to drop. But + * that will never happen, as we're already holding a + * reference to it. + */ + bio_set_flag(*bio, BIO_QUEUE_ENTERED); + bio_chain(split, *bio); trace_block_split(q, split, (*bio)->bi_iter.bi_sector); generic_make_request(*bio); @@ -724,13 +734,12 @@ static struct request *attempt_merge(struct request_queue *q, } /* - * At this point we have either done a back merge - * or front merge. We need the smaller start_time of - * the merged requests to be the current request - * for accounting purposes. + * At this point we have either done a back merge or front merge. We + * need the smaller start_time_ns of the merged requests to be the + * current request for accounting purposes. */ - if (time_after(req->start_time, next->start_time)) - req->start_time = next->start_time; + if (next->start_time_ns < req->start_time_ns) + req->start_time_ns = next->start_time_ns; req->biotail->bi_next = next->bio; req->biotail = next->biotail; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3080e18cb859..ffa622366922 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -344,7 +344,6 @@ static const char *const rqf_name[] = { RQF_NAME(STATS), RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(ZONE_WRITE_LOCKED), - RQF_NAME(MQ_TIMEOUT_EXPIRED), RQF_NAME(MQ_POLL_SLEPT), }; #undef RQF_NAME diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 25c14c58385c..56c493c6cd90 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -268,19 +268,16 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); /* - * Reverse check our software queue for entries that we could potentially - * merge with. Currently includes a hand-wavy stop count of 8, to not spend - * too much time checking for merges. + * Iterate list of requests and see if we can merge this bio with any + * of them. */ -static bool blk_mq_attempt_merge(struct request_queue *q, - struct blk_mq_ctx *ctx, struct bio *bio) +bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, + struct bio *bio) { struct request *rq; int checked = 8; - lockdep_assert_held(&ctx->lock); - - list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { + list_for_each_entry_reverse(rq, list, queuelist) { bool merged = false; if (!checked--) @@ -305,13 +302,30 @@ static bool blk_mq_attempt_merge(struct request_queue *q, continue; } - if (merged) - ctx->rq_merged++; return merged; } return false; } +EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge); + +/* + * Reverse check our software queue for entries that we could potentially + * merge with. Currently includes a hand-wavy stop count of 8, to not spend + * too much time checking for merges. + */ +static bool blk_mq_attempt_merge(struct request_queue *q, + struct blk_mq_ctx *ctx, struct bio *bio) +{ + lockdep_assert_held(&ctx->lock); + + if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) { + ctx->rq_merged++; + return true; + } + + return false; +} bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { @@ -571,6 +585,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (!e) { q->elevator = NULL; + q->nr_requests = q->tag_set->queue_depth; return 0; } @@ -633,14 +648,3 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) blk_mq_sched_tags_teardown(q); q->elevator = NULL; } - -int blk_mq_sched_init(struct request_queue *q) -{ - int ret; - - mutex_lock(&q->sysfs_lock); - ret = elevator_init(q, NULL); - mutex_unlock(&q->sysfs_lock); - - return ret; -} diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 1e9c9018ace1..0cb8f938dff9 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -33,8 +33,6 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx); -int blk_mq_sched_init(struct request_queue *q); - static inline bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index a54b4b070f1c..aafb44224c89 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = { }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { - .attr = {.name = "nr_tags", .mode = S_IRUGO }, + .attr = {.name = "nr_tags", .mode = 0444 }, .show = blk_mq_hw_sysfs_nr_tags_show, }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = { - .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO }, + .attr = {.name = "nr_reserved_tags", .mode = 0444 }, .show = blk_mq_hw_sysfs_nr_reserved_tags_show, }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { - .attr = {.name = "cpu_list", .mode = S_IRUGO }, + .attr = {.name = "cpu_list", .mode = 0444 }, .show = blk_mq_hw_sysfs_cpus_show, }; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 336dde07b230..70356a2a11ab 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) ws = bt_wait_ptr(bt, data->hctx); drop_ctx = data->ctx == NULL; do { + struct sbitmap_queue *bt_prev; + /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for @@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (data->ctx) blk_mq_put_ctx(data->ctx); + bt_prev = bt; io_schedule(); data->ctx = blk_mq_get_ctx(data->q); @@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) bt = &tags->bitmap_tags; finish_wait(&ws->wait, &wait); + + /* + * If destination hw queue is changed, fake wake up on + * previous queue for compensating the wake up miss, so + * other allocations on previous queue won't be starved. + */ + if (bt != bt_prev) + sbitmap_queue_wake_up(bt_prev); + ws = bt_wait_ptr(bt, data->hctx); } while (1); @@ -259,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; - if (rq) + if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) iter_data->fn(rq, iter_data->data, reserved); return true; diff --git a/block/blk-mq.c b/block/blk-mq.c index 9ce9cac16c3f..6332940ca118 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -309,7 +309,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, RB_CLEAR_NODE(&rq->rb_node); rq->rq_disk = NULL; rq->part = NULL; - rq->start_time = jiffies; + rq->start_time_ns = ktime_get_ns(); + rq->io_start_time_ns = 0; rq->nr_phys_segments = 0; #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; @@ -328,11 +329,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, #ifdef CONFIG_BLK_CGROUP rq->rl = NULL; - set_start_time_ns(rq); - rq->io_start_time_ns = 0; #endif data->ctx->rq_dispatched[op_is_sync(op)]++; + refcount_set(&rq->ref, 1); return rq; } @@ -361,9 +361,11 @@ static struct request *blk_mq_get_request(struct request_queue *q, /* * Flush requests are special and go directly to the - * dispatch list. + * dispatch list. Don't include reserved tags in the + * limiting, as it isn't useful. */ - if (!op_is_flush(op) && e->type->ops.mq.limit_depth) + if (!op_is_flush(op) && e->type->ops.mq.limit_depth && + !(data->flags & BLK_MQ_REQ_RESERVED)) e->type->ops.mq.limit_depth(op, data); } @@ -464,13 +466,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); +static void __blk_mq_free_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + const int sched_tag = rq->internal_tag; + + if (rq->tag != -1) + blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + if (sched_tag != -1) + blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_sched_restart(hctx); + blk_queue_exit(q); +} + void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - const int sched_tag = rq->internal_tag; if (rq->rq_flags & RQF_ELVPRIV) { if (e && e->type->ops.mq.finish_request) @@ -488,27 +504,30 @@ void blk_mq_free_request(struct request *rq) if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->backing_dev_info); - wbt_done(q->rq_wb, &rq->issue_stat); + wbt_done(q->rq_wb, rq); if (blk_rq_rl(rq)) blk_put_rl(blk_rq_rl(rq)); - blk_mq_rq_update_state(rq, MQ_RQ_IDLE); - if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); - if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); - blk_mq_sched_restart(hctx); - blk_queue_exit(q); + WRITE_ONCE(rq->state, MQ_RQ_IDLE); + if (refcount_dec_and_test(&rq->ref)) + __blk_mq_free_request(rq); } EXPORT_SYMBOL_GPL(blk_mq_free_request); inline void __blk_mq_end_request(struct request *rq, blk_status_t error) { - blk_account_io_done(rq); + u64 now = ktime_get_ns(); + + if (rq->rq_flags & RQF_STATS) { + blk_mq_poll_stats_start(rq->q); + blk_stat_add(rq, now); + } + + blk_account_io_done(rq, now); if (rq->end_io) { - wbt_done(rq->q->rq_wb, &rq->issue_stat); + wbt_done(rq->q->rq_wb, rq); rq->end_io(rq, error); } else { if (unlikely(blk_bidi_rq(rq))) @@ -539,15 +558,12 @@ static void __blk_mq_complete_request(struct request *rq) bool shared = false; int cpu; - WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT); - blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE); + if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) != + MQ_RQ_IN_FLIGHT) + return; if (rq->internal_tag != -1) blk_mq_sched_completed_request(rq); - if (rq->rq_flags & RQF_STATS) { - blk_mq_poll_stats_start(rq->q); - blk_stat_add(rq); - } if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); @@ -589,36 +605,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) *srcu_idx = srcu_read_lock(hctx->srcu); } -static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate) -{ - unsigned long flags; - - /* - * blk_mq_rq_aborted_gstate() is used from the completion path and - * can thus be called from irq context. u64_stats_fetch in the - * middle of update on the same CPU leads to lockup. Disable irq - * while updating. - */ - local_irq_save(flags); - u64_stats_update_begin(&rq->aborted_gstate_sync); - rq->aborted_gstate = gstate; - u64_stats_update_end(&rq->aborted_gstate_sync); - local_irq_restore(flags); -} - -static u64 blk_mq_rq_aborted_gstate(struct request *rq) -{ - unsigned int start; - u64 aborted_gstate; - - do { - start = u64_stats_fetch_begin(&rq->aborted_gstate_sync); - aborted_gstate = rq->aborted_gstate; - } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start)); - - return aborted_gstate; -} - /** * blk_mq_complete_request - end I/O on a request * @rq: the request being processed @@ -629,28 +615,9 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq) **/ void blk_mq_complete_request(struct request *rq) { - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); - int srcu_idx; - - if (unlikely(blk_should_fake_timeout(q))) + if (unlikely(blk_should_fake_timeout(rq->q))) return; - - /* - * If @rq->aborted_gstate equals the current instance, timeout is - * claiming @rq and we lost. This is synchronized through - * hctx_lock(). See blk_mq_timeout_work() for details. - * - * Completion path never blocks and we can directly use RCU here - * instead of hctx_lock() which can be either RCU or SRCU. - * However, that would complicate paths which want to synchronize - * against us. Let stay in sync with the issue path so that - * hctx_lock() covers both issue and completion paths. - */ - hctx_lock(hctx, &srcu_idx); - if (blk_mq_rq_aborted_gstate(rq) != rq->gstate) - __blk_mq_complete_request(rq); - hctx_unlock(hctx, srcu_idx); + __blk_mq_complete_request(rq); } EXPORT_SYMBOL(blk_mq_complete_request); @@ -669,32 +636,18 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(q, rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq)); + rq->io_start_time_ns = ktime_get_ns(); +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + rq->throtl_size = blk_rq_sectors(rq); +#endif rq->rq_flags |= RQF_STATS; - wbt_issue(q->rq_wb, &rq->issue_stat); + wbt_issue(q->rq_wb, rq); } WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); - /* - * Mark @rq in-flight which also advances the generation number, - * and register for timeout. Protect with a seqcount to allow the - * timeout path to read both @rq->gstate and @rq->deadline - * coherently. - * - * This is the only place where a request is marked in-flight. If - * the timeout path reads an in-flight @rq->gstate, the - * @rq->deadline it reads together under @rq->gstate_seq is - * guaranteed to be the matching one. - */ - preempt_disable(); - write_seqcount_begin(&rq->gstate_seq); - - blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT); blk_add_timer(rq); - - write_seqcount_end(&rq->gstate_seq); - preempt_enable(); + WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); if (q->dma_drain_size && blk_rq_bytes(rq)) { /* @@ -707,11 +660,6 @@ void blk_mq_start_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_start_request); -/* - * When we reach here because queue is busy, it's safe to change the state - * to IDLE without checking @rq->aborted_gstate because we should still be - * holding the RCU read lock and thus protected against timeout. - */ static void __blk_mq_requeue_request(struct request *rq) { struct request_queue *q = rq->q; @@ -719,10 +667,10 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); trace_block_rq_requeue(q, rq); - wbt_requeue(q->rq_wb, &rq->issue_stat); + wbt_requeue(q->rq_wb, rq); - if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) { - blk_mq_rq_update_state(rq, MQ_RQ_IDLE); + if (blk_mq_request_started(rq)) { + WRITE_ONCE(rq->state, MQ_RQ_IDLE); if (q->dma_drain_size && blk_rq_bytes(rq)) rq->nr_phys_segments--; } @@ -820,101 +768,79 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) } EXPORT_SYMBOL(blk_mq_tag_to_rq); -struct blk_mq_timeout_data { - unsigned long next; - unsigned int next_set; - unsigned int nr_expired; -}; - static void blk_mq_rq_timed_out(struct request *req, bool reserved) { - const struct blk_mq_ops *ops = req->q->mq_ops; - enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; - - req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED; - - if (ops->timeout) - ret = ops->timeout(req, reserved); + if (req->q->mq_ops->timeout) { + enum blk_eh_timer_return ret; - switch (ret) { - case BLK_EH_HANDLED: - __blk_mq_complete_request(req); - break; - case BLK_EH_RESET_TIMER: - /* - * As nothing prevents from completion happening while - * ->aborted_gstate is set, this may lead to ignored - * completions and further spurious timeouts. - */ - blk_mq_rq_update_aborted_gstate(req, 0); - blk_add_timer(req); - break; - case BLK_EH_NOT_HANDLED: - break; - default: - printk(KERN_ERR "block: bad eh return: %d\n", ret); - break; + ret = req->q->mq_ops->timeout(req, reserved); + if (ret == BLK_EH_DONE) + return; + WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); } + + blk_add_timer(req); } -static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, bool reserved) +static bool blk_mq_req_expired(struct request *rq, unsigned long *next) { - struct blk_mq_timeout_data *data = priv; - unsigned long gstate, deadline; - int start; + unsigned long deadline; - might_sleep(); - - if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) - return; + if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) + return false; - /* read coherent snapshots of @rq->state_gen and @rq->deadline */ - while (true) { - start = read_seqcount_begin(&rq->gstate_seq); - gstate = READ_ONCE(rq->gstate); - deadline = blk_rq_deadline(rq); - if (!read_seqcount_retry(&rq->gstate_seq, start)) - break; - cond_resched(); - } + deadline = blk_rq_deadline(rq); + if (time_after_eq(jiffies, deadline)) + return true; - /* if in-flight && overdue, mark for abortion */ - if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT && - time_after_eq(jiffies, deadline)) { - blk_mq_rq_update_aborted_gstate(rq, gstate); - data->nr_expired++; - hctx->nr_expired++; - } else if (!data->next_set || time_after(data->next, deadline)) { - data->next = deadline; - data->next_set = 1; - } + if (*next == 0) + *next = deadline; + else if (time_after(*next, deadline)) + *next = deadline; + return false; } -static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx, +static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { + unsigned long *next = priv; + /* - * We marked @rq->aborted_gstate and waited for RCU. If there were - * completions that we lost to, they would have finished and - * updated @rq->gstate by now; otherwise, the completion path is - * now guaranteed to see @rq->aborted_gstate and yield. If - * @rq->aborted_gstate still matches @rq->gstate, @rq is ours. + * Just do a quick check if it is expired before locking the request in + * so we're not unnecessarilly synchronizing across CPUs. + */ + if (!blk_mq_req_expired(rq, next)) + return; + + /* + * We have reason to believe the request may be expired. Take a + * reference on the request to lock this request lifetime into its + * currently allocated context to prevent it from being reallocated in + * the event the completion by-passes this timeout handler. + * + * If the reference was already released, then the driver beat the + * timeout handler to posting a natural completion. */ - if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) && - READ_ONCE(rq->gstate) == rq->aborted_gstate) + if (!refcount_inc_not_zero(&rq->ref)) + return; + + /* + * The request is now locked and cannot be reallocated underneath the + * timeout handler's processing. Re-verify this exact request is truly + * expired; if it is not expired, then the request was completed and + * reallocated as a new request. + */ + if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); + if (refcount_dec_and_test(&rq->ref)) + __blk_mq_free_request(rq); } static void blk_mq_timeout_work(struct work_struct *work) { struct request_queue *q = container_of(work, struct request_queue, timeout_work); - struct blk_mq_timeout_data data = { - .next = 0, - .next_set = 0, - .nr_expired = 0, - }; + unsigned long next = 0; struct blk_mq_hw_ctx *hctx; int i; @@ -934,39 +860,10 @@ static void blk_mq_timeout_work(struct work_struct *work) if (!percpu_ref_tryget(&q->q_usage_counter)) return; - /* scan for the expired ones and set their ->aborted_gstate */ - blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); - - if (data.nr_expired) { - bool has_rcu = false; - - /* - * Wait till everyone sees ->aborted_gstate. The - * sequential waits for SRCUs aren't ideal. If this ever - * becomes a problem, we can add per-hw_ctx rcu_head and - * wait in parallel. - */ - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->nr_expired) - continue; - - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) - has_rcu = true; - else - synchronize_srcu(hctx->srcu); - - hctx->nr_expired = 0; - } - if (has_rcu) - synchronize_rcu(); - - /* terminate the ones we won */ - blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL); - } + blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next); - if (data.next_set) { - data.next = blk_rq_timeout(round_jiffies_up(data.next)); - mod_timer(&q->timeout, data.next); + if (next != 0) { + mod_timer(&q->timeout, next); } else { /* * Request timeouts are handled as a forward rolling timer. If @@ -1029,7 +926,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; spin_lock(&ctx->lock); - if (unlikely(!list_empty(&ctx->rq_list))) { + if (!list_empty(&ctx->rq_list)) { dispatch_data->rq = list_entry_rq(ctx->rq_list.next); list_del_init(&dispatch_data->rq->queuelist); if (list_empty(&ctx->rq_list)) @@ -1716,15 +1613,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) blk_account_io_start(rq, true); } -static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, - struct request *rq) -{ - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, false); - spin_unlock(&ctx->lock); -} - static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) { if (rq->tag != -1) @@ -1882,7 +1770,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - wbt_track(&rq->issue_stat, wb_acct); + wbt_track(rq, wb_acct); cookie = request_to_qc_t(data.hctx, rq); @@ -1949,15 +1837,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); blk_mq_try_issue_directly(data.hctx, rq, &cookie); - } else if (q->elevator) { - blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); - blk_mq_sched_insert_request(rq, false, true, true); } else { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - blk_mq_queue_io(data.hctx, data.ctx, rq); - blk_mq_run_hw_queue(data.hctx, true); + blk_mq_sched_insert_request(rq, false, true, true); } return cookie; @@ -2056,15 +1939,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, return ret; } - seqcount_init(&rq->gstate_seq); - u64_stats_init(&rq->aborted_gstate_sync); - /* - * start gstate with gen 1 instead of 0, otherwise it will be equal - * to aborted_gstate, and be identified timed out by - * blk_mq_terminate_expired. - */ - WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC); - + WRITE_ONCE(rq->state, MQ_RQ_IDLE); return 0; } @@ -2365,6 +2240,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); hctx->nr_ctx = 0; + hctx->dispatch_from = NULL; } /* @@ -2697,7 +2573,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!(set->flags & BLK_MQ_F_NO_SCHED)) { int ret; - ret = blk_mq_sched_init(q); + ret = elevator_init_mq(q); if (ret) return ERR_PTR(ret); } diff --git a/block/blk-mq.h b/block/blk-mq.h index e1bb420dc5d6..89231e439b2f 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -30,20 +30,6 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -/* - * Bits for request->gstate. The lower two bits carry MQ_RQ_* state value - * and the upper bits the generation number. - */ -enum mq_rq_state { - MQ_RQ_IDLE = 0, - MQ_RQ_IN_FLIGHT = 1, - MQ_RQ_COMPLETE = 2, - - MQ_RQ_STATE_BITS = 2, - MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1, - MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS, -}; - void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); @@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q); * blk_mq_rq_state() - read the current MQ_RQ_* state of a request * @rq: target request. */ -static inline int blk_mq_rq_state(struct request *rq) +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) { - return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK; -} - -/** - * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request - * @rq: target request. - * @state: new state to set. - * - * Set @rq's state to @state. The caller is responsible for ensuring that - * there are no other updaters. A request can transition into IN_FLIGHT - * only from IDLE and doing so increments the generation number. - */ -static inline void blk_mq_rq_update_state(struct request *rq, - enum mq_rq_state state) -{ - u64 old_val = READ_ONCE(rq->gstate); - u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state; - - if (state == MQ_RQ_IN_FLIGHT) { - WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE); - new_val += MQ_RQ_GEN_INC; - } - - /* avoid exposing interim values */ - WRITE_ONCE(rq->gstate, new_val); + return READ_ONCE(rq->state); } static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, diff --git a/block/blk-stat.c b/block/blk-stat.c index bd365a95fcf8..175c143ac5b9 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -47,19 +47,15 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value) stat->nr_samples++; } -void blk_stat_add(struct request *rq) +void blk_stat_add(struct request *rq, u64 now) { struct request_queue *q = rq->q; struct blk_stat_callback *cb; struct blk_rq_stat *stat; int bucket; - u64 now, value; + u64 value; - now = __blk_stat_time(ktime_to_ns(ktime_get())); - if (now < blk_stat_time(&rq->issue_stat)) - return; - - value = now - blk_stat_time(&rq->issue_stat); + value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0; blk_throtl_stat_add(rq, value); diff --git a/block/blk-stat.h b/block/blk-stat.h index 2dd36347252a..78399cdde9c9 100644 --- a/block/blk-stat.h +++ b/block/blk-stat.h @@ -8,21 +8,6 @@ #include <linux/rcupdate.h> #include <linux/timer.h> -/* - * from upper: - * 3 bits: reserved for other usage - * 12 bits: size - * 49 bits: time - */ -#define BLK_STAT_RES_BITS 3 -#define BLK_STAT_SIZE_BITS 12 -#define BLK_STAT_RES_SHIFT (64 - BLK_STAT_RES_BITS) -#define BLK_STAT_SIZE_SHIFT (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS) -#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SIZE_SHIFT) - 1) -#define BLK_STAT_SIZE_MASK \ - (((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT) -#define BLK_STAT_RES_MASK (~((1ULL << BLK_STAT_RES_SHIFT) - 1)) - /** * struct blk_stat_callback - Block statistics callback. * @@ -80,35 +65,7 @@ struct blk_stat_callback { struct blk_queue_stats *blk_alloc_queue_stats(void); void blk_free_queue_stats(struct blk_queue_stats *); -void blk_stat_add(struct request *); - -static inline u64 __blk_stat_time(u64 time) -{ - return time & BLK_STAT_TIME_MASK; -} - -static inline u64 blk_stat_time(struct blk_issue_stat *stat) -{ - return __blk_stat_time(stat->stat); -} - -static inline sector_t blk_capped_size(sector_t size) -{ - return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1); -} - -static inline sector_t blk_stat_size(struct blk_issue_stat *stat) -{ - return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT; -} - -static inline void blk_stat_set_issue(struct blk_issue_stat *stat, - sector_t size) -{ - stat->stat = (stat->stat & BLK_STAT_RES_MASK) | - (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) | - (((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT); -} +void blk_stat_add(struct request *rq, u64 now); /* record time/size info in request but not add a callback */ void blk_stat_enable_accounting(struct request_queue *q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d00d1b0ec109..94987b1f69e1 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -491,188 +491,198 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page, return count; } +static ssize_t queue_fua_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags)); +} + static ssize_t queue_dax_show(struct request_queue *q, char *page) { return queue_var_show(blk_queue_dax(q), page); } static struct queue_sysfs_entry queue_requests_entry = { - .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "nr_requests", .mode = 0644 }, .show = queue_requests_show, .store = queue_requests_store, }; static struct queue_sysfs_entry queue_ra_entry = { - .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "read_ahead_kb", .mode = 0644 }, .show = queue_ra_show, .store = queue_ra_store, }; static struct queue_sysfs_entry queue_max_sectors_entry = { - .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "max_sectors_kb", .mode = 0644 }, .show = queue_max_sectors_show, .store = queue_max_sectors_store, }; static struct queue_sysfs_entry queue_max_hw_sectors_entry = { - .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, + .attr = {.name = "max_hw_sectors_kb", .mode = 0444 }, .show = queue_max_hw_sectors_show, }; static struct queue_sysfs_entry queue_max_segments_entry = { - .attr = {.name = "max_segments", .mode = S_IRUGO }, + .attr = {.name = "max_segments", .mode = 0444 }, .show = queue_max_segments_show, }; static struct queue_sysfs_entry queue_max_discard_segments_entry = { - .attr = {.name = "max_discard_segments", .mode = S_IRUGO }, + .attr = {.name = "max_discard_segments", .mode = 0444 }, .show = queue_max_discard_segments_show, }; static struct queue_sysfs_entry queue_max_integrity_segments_entry = { - .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, + .attr = {.name = "max_integrity_segments", .mode = 0444 }, .show = queue_max_integrity_segments_show, }; static struct queue_sysfs_entry queue_max_segment_size_entry = { - .attr = {.name = "max_segment_size", .mode = S_IRUGO }, + .attr = {.name = "max_segment_size", .mode = 0444 }, .show = queue_max_segment_size_show, }; static struct queue_sysfs_entry queue_iosched_entry = { - .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "scheduler", .mode = 0644 }, .show = elv_iosched_show, .store = elv_iosched_store, }; static struct queue_sysfs_entry queue_hw_sector_size_entry = { - .attr = {.name = "hw_sector_size", .mode = S_IRUGO }, + .attr = {.name = "hw_sector_size", .mode = 0444 }, .show = queue_logical_block_size_show, }; static struct queue_sysfs_entry queue_logical_block_size_entry = { - .attr = {.name = "logical_block_size", .mode = S_IRUGO }, + .attr = {.name = "logical_block_size", .mode = 0444 }, .show = queue_logical_block_size_show, }; static struct queue_sysfs_entry queue_physical_block_size_entry = { - .attr = {.name = "physical_block_size", .mode = S_IRUGO }, + .attr = {.name = "physical_block_size", .mode = 0444 }, .show = queue_physical_block_size_show, }; static struct queue_sysfs_entry queue_chunk_sectors_entry = { - .attr = {.name = "chunk_sectors", .mode = S_IRUGO }, + .attr = {.name = "chunk_sectors", .mode = 0444 }, .show = queue_chunk_sectors_show, }; static struct queue_sysfs_entry queue_io_min_entry = { - .attr = {.name = "minimum_io_size", .mode = S_IRUGO }, + .attr = {.name = "minimum_io_size", .mode = 0444 }, .show = queue_io_min_show, }; static struct queue_sysfs_entry queue_io_opt_entry = { - .attr = {.name = "optimal_io_size", .mode = S_IRUGO }, + .attr = {.name = "optimal_io_size", .mode = 0444 }, .show = queue_io_opt_show, }; static struct queue_sysfs_entry queue_discard_granularity_entry = { - .attr = {.name = "discard_granularity", .mode = S_IRUGO }, + .attr = {.name = "discard_granularity", .mode = 0444 }, .show = queue_discard_granularity_show, }; static struct queue_sysfs_entry queue_discard_max_hw_entry = { - .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO }, + .attr = {.name = "discard_max_hw_bytes", .mode = 0444 }, .show = queue_discard_max_hw_show, }; static struct queue_sysfs_entry queue_discard_max_entry = { - .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "discard_max_bytes", .mode = 0644 }, .show = queue_discard_max_show, .store = queue_discard_max_store, }; static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { - .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO }, + .attr = {.name = "discard_zeroes_data", .mode = 0444 }, .show = queue_discard_zeroes_data_show, }; static struct queue_sysfs_entry queue_write_same_max_entry = { - .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, + .attr = {.name = "write_same_max_bytes", .mode = 0444 }, .show = queue_write_same_max_show, }; static struct queue_sysfs_entry queue_write_zeroes_max_entry = { - .attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO }, + .attr = {.name = "write_zeroes_max_bytes", .mode = 0444 }, .show = queue_write_zeroes_max_show, }; static struct queue_sysfs_entry queue_nonrot_entry = { - .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "rotational", .mode = 0644 }, .show = queue_show_nonrot, .store = queue_store_nonrot, }; static struct queue_sysfs_entry queue_zoned_entry = { - .attr = {.name = "zoned", .mode = S_IRUGO }, + .attr = {.name = "zoned", .mode = 0444 }, .show = queue_zoned_show, }; static struct queue_sysfs_entry queue_nomerges_entry = { - .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "nomerges", .mode = 0644 }, .show = queue_nomerges_show, .store = queue_nomerges_store, }; static struct queue_sysfs_entry queue_rq_affinity_entry = { - .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "rq_affinity", .mode = 0644 }, .show = queue_rq_affinity_show, .store = queue_rq_affinity_store, }; static struct queue_sysfs_entry queue_iostats_entry = { - .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "iostats", .mode = 0644 }, .show = queue_show_iostats, .store = queue_store_iostats, }; static struct queue_sysfs_entry queue_random_entry = { - .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "add_random", .mode = 0644 }, .show = queue_show_random, .store = queue_store_random, }; static struct queue_sysfs_entry queue_poll_entry = { - .attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "io_poll", .mode = 0644 }, .show = queue_poll_show, .store = queue_poll_store, }; static struct queue_sysfs_entry queue_poll_delay_entry = { - .attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "io_poll_delay", .mode = 0644 }, .show = queue_poll_delay_show, .store = queue_poll_delay_store, }; static struct queue_sysfs_entry queue_wc_entry = { - .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "write_cache", .mode = 0644 }, .show = queue_wc_show, .store = queue_wc_store, }; +static struct queue_sysfs_entry queue_fua_entry = { + .attr = {.name = "fua", .mode = 0444 }, + .show = queue_fua_show, +}; + static struct queue_sysfs_entry queue_dax_entry = { - .attr = {.name = "dax", .mode = S_IRUGO }, + .attr = {.name = "dax", .mode = 0444 }, .show = queue_dax_show, }; static struct queue_sysfs_entry queue_wb_lat_entry = { - .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "wbt_lat_usec", .mode = 0644 }, .show = queue_wb_lat_show, .store = queue_wb_lat_store, }; #ifdef CONFIG_BLK_DEV_THROTTLING_LOW static struct queue_sysfs_entry throtl_sample_time_entry = { - .attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "throttle_sample_time", .mode = 0644 }, .show = blk_throtl_sample_time_show, .store = blk_throtl_sample_time_store, }; @@ -708,6 +718,7 @@ static struct attribute *default_attrs[] = { &queue_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, + &queue_fua_entry.attr, &queue_dax_entry.attr, &queue_wb_lat_entry.attr, &queue_poll_delay_entry.attr, @@ -813,8 +824,7 @@ static void __blk_release_queue(struct work_struct *work) if (q->mq_ops) blk_mq_debugfs_unregister(q); - if (q->bio_split) - bioset_free(q->bio_split); + bioset_exit(&q->bio_split); ida_simple_remove(&blk_queue_ida, q->id); call_rcu(&q->rcu_head, blk_free_queue_rcu); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c5a131673733..82282e6fdcf8 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -36,8 +36,6 @@ static int throtl_quantum = 32; */ #define LATENCY_FILTERED_HD (1000L) /* 1ms */ -#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) - static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ @@ -821,7 +819,7 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw) if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) return false; - return 1; + return true; } /* Trim the used slices and adjust slice start accordingly */ @@ -931,7 +929,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, if (wait) *wait = jiffy_wait; - return 0; + return false; } static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, @@ -974,7 +972,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); if (wait) *wait = jiffy_wait; - return 0; + return false; } /* @@ -1024,7 +1022,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, tg_with_in_iops_limit(tg, bio, &iops_wait)) { if (wait) *wait = 0; - return 1; + return true; } max_wait = max(bps_wait, iops_wait); @@ -1035,7 +1033,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, if (time_before(tg->slice_end[rw], jiffies + max_wait)) throtl_extend_slice(tg, rw, jiffies + max_wait); - return 0; + return false; } static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) @@ -1209,7 +1207,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) while (1) { struct throtl_grp *tg = throtl_rb_first(parent_sq); - struct throtl_service_queue *sq = &tg->service_queue; + struct throtl_service_queue *sq; if (!tg) break; @@ -1221,6 +1219,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) nr_disp += throtl_dispatch_tg(tg); + sq = &tg->service_queue; if (sq->nr_queued[0] || sq->nr_queued[1]) tg_update_disptime(tg); @@ -2139,7 +2138,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) bio->bi_cg_private = tg; blkg_get(tg_to_blkg(tg)); } - blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio)); + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } @@ -2251,7 +2250,7 @@ out: #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) - bio->bi_issue_stat.stat |= SKIP_LATENCY; + bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; #endif return throttled; } @@ -2281,8 +2280,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns) struct request_queue *q = rq->q; struct throtl_data *td = q->td; - throtl_track_latency(td, blk_stat_size(&rq->issue_stat), - req_op(rq), time_ns >> 10); + throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10); } void blk_throtl_bio_endio(struct bio *bio) @@ -2302,8 +2300,8 @@ void blk_throtl_bio_endio(struct bio *bio) finish_time_ns = ktime_get_ns(); tg->last_finish_time = finish_time_ns >> 10; - start_time = blk_stat_time(&bio->bi_issue_stat) >> 10; - finish_time = __blk_stat_time(finish_time_ns) >> 10; + start_time = bio_issue_time(&bio->bi_issue) >> 10; + finish_time = __bio_issue_time(finish_time_ns) >> 10; if (!start_time || finish_time <= start_time) { blkg_put(tg_to_blkg(tg)); return; @@ -2311,16 +2309,15 @@ void blk_throtl_bio_endio(struct bio *bio) lat = finish_time - start_time; /* this is only for bio based driver */ - if (!(bio->bi_issue_stat.stat & SKIP_LATENCY)) - throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat), - bio_op(bio), lat); + if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY)) + throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue), + bio_op(bio), lat); if (tg->latency_target && lat >= tg->td->filtered_latency) { int bucket; unsigned int threshold; - bucket = request_bucket_index( - blk_stat_size(&bio->bi_issue_stat)); + bucket = request_bucket_index(bio_issue_size(&bio->bi_issue)); threshold = tg->td->avg_buckets[rw][bucket].latency + tg->latency_target; if (lat > threshold) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 652d4d4d3e97..4b8a48d48ba1 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -86,14 +86,11 @@ static void blk_rq_timed_out(struct request *req) if (q->rq_timed_out_fn) ret = q->rq_timed_out_fn(req); switch (ret) { - case BLK_EH_HANDLED: - __blk_complete_request(req); - break; case BLK_EH_RESET_TIMER: blk_add_timer(req); blk_clear_rq_complete(req); break; - case BLK_EH_NOT_HANDLED: + case BLK_EH_DONE: /* * LLD handles this for now but in the future * we can send a request msg to abort the command @@ -214,7 +211,6 @@ void blk_add_timer(struct request *req) req->timeout = q->rq_timeout; blk_rq_set_deadline(req, jiffies + req->timeout); - req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED; /* * Only the non-mq case needs to add the request to a protected list. diff --git a/block/blk-wbt.c b/block/blk-wbt.c index f92fc84b5e2c..4f89b28fa652 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -29,6 +29,26 @@ #define CREATE_TRACE_POINTS #include <trace/events/wbt.h> +static inline void wbt_clear_state(struct request *rq) +{ + rq->wbt_flags = 0; +} + +static inline enum wbt_flags wbt_flags(struct request *rq) +{ + return rq->wbt_flags; +} + +static inline bool wbt_is_tracked(struct request *rq) +{ + return rq->wbt_flags & WBT_TRACKED; +} + +static inline bool wbt_is_read(struct request *rq) +{ + return rq->wbt_flags & WBT_READ; +} + enum { /* * Default setting, we'll scale up (to 75% of QD max) or down (min 1) @@ -101,9 +121,15 @@ static bool wb_recent_wait(struct rq_wb *rwb) return time_before(jiffies, wb->dirty_sleep + HZ); } -static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd) +static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, + enum wbt_flags wb_acct) { - return &rwb->rq_wait[is_kswapd]; + if (wb_acct & WBT_KSWAPD) + return &rwb->rq_wait[WBT_RWQ_KSWAPD]; + else if (wb_acct & WBT_DISCARD) + return &rwb->rq_wait[WBT_RWQ_DISCARD]; + + return &rwb->rq_wait[WBT_RWQ_BG]; } static void rwb_wake_all(struct rq_wb *rwb) @@ -126,7 +152,7 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) if (!(wb_acct & WBT_TRACKED)) return; - rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD); + rqw = get_rq_wait(rwb, wb_acct); inflight = atomic_dec_return(&rqw->inflight); /* @@ -139,10 +165,13 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) } /* - * If the device does write back caching, drop further down - * before we wake people up. + * For discards, our limit is always the background. For writes, if + * the device does write back caching, drop further down before we + * wake people up. */ - if (rwb->wc && !wb_recent_wait(rwb)) + if (wb_acct & WBT_DISCARD) + limit = rwb->wb_background; + else if (rwb->wc && !wb_recent_wait(rwb)) limit = 0; else limit = rwb->wb_normal; @@ -165,24 +194,24 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct) * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. */ -void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) +void wbt_done(struct rq_wb *rwb, struct request *rq) { if (!rwb) return; - if (!wbt_is_tracked(stat)) { - if (rwb->sync_cookie == stat) { + if (!wbt_is_tracked(rq)) { + if (rwb->sync_cookie == rq) { rwb->sync_issue = 0; rwb->sync_cookie = NULL; } - if (wbt_is_read(stat)) + if (wbt_is_read(rq)) wb_timestamp(rwb, &rwb->last_comp); } else { - WARN_ON_ONCE(stat == rwb->sync_cookie); - __wbt_done(rwb, wbt_stat_to_mask(stat)); + WARN_ON_ONCE(rq == rwb->sync_cookie); + __wbt_done(rwb, wbt_flags(rq)); } - wbt_clear_state(stat); + wbt_clear_state(rq); } /* @@ -479,6 +508,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) { unsigned int limit; + if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) + return rwb->wb_background; + /* * At this point we know it's a buffered write. If this is * kswapd trying to free memory, or REQ_SYNC is set, then @@ -529,11 +561,12 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw, * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. */ -static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) +static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, + unsigned long rw, spinlock_t *lock) __releases(lock) __acquires(lock) { - struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd()); + struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); DEFINE_WAIT(wait); if (may_queue(rwb, rqw, &wait, rw)) @@ -559,21 +592,20 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock) static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) { - const int op = bio_op(bio); - - /* - * If not a WRITE, do nothing - */ - if (op != REQ_OP_WRITE) - return false; - - /* - * Don't throttle WRITE_ODIRECT - */ - if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE)) + switch (bio_op(bio)) { + case REQ_OP_WRITE: + /* + * Don't throttle WRITE_ODIRECT + */ + if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == + (REQ_SYNC | REQ_IDLE)) + return false; + /* fallthrough */ + case REQ_OP_DISCARD: + return true; + default: return false; - - return true; + } } /* @@ -584,7 +616,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) */ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) { - unsigned int ret = 0; + enum wbt_flags ret = 0; if (!rwb_enabled(rwb)) return 0; @@ -598,41 +630,42 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock) return ret; } - __wbt_wait(rwb, bio->bi_opf, lock); + if (current_is_kswapd()) + ret |= WBT_KSWAPD; + if (bio_op(bio) == REQ_OP_DISCARD) + ret |= WBT_DISCARD; + + __wbt_wait(rwb, ret, bio->bi_opf, lock); if (!blk_stat_is_active(rwb->cb)) rwb_arm_timer(rwb); - if (current_is_kswapd()) - ret |= WBT_KSWAPD; - return ret | WBT_TRACKED; } -void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat) +void wbt_issue(struct rq_wb *rwb, struct request *rq) { if (!rwb_enabled(rwb)) return; /* - * Track sync issue, in case it takes a long time to complete. Allows - * us to react quicker, if a sync IO takes a long time to complete. - * Note that this is just a hint. 'stat' can go away when the - * request completes, so it's important we never dereference it. We - * only use the address to compare with, which is why we store the - * sync_issue time locally. + * Track sync issue, in case it takes a long time to complete. Allows us + * to react quicker, if a sync IO takes a long time to complete. Note + * that this is just a hint. The request can go away when it completes, + * so it's important we never dereference it. We only use the address to + * compare with, which is why we store the sync_issue time locally. */ - if (wbt_is_read(stat) && !rwb->sync_issue) { - rwb->sync_cookie = stat; - rwb->sync_issue = blk_stat_time(stat); + if (wbt_is_read(rq) && !rwb->sync_issue) { + rwb->sync_cookie = rq; + rwb->sync_issue = rq->io_start_time_ns; } } -void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat) +void wbt_requeue(struct rq_wb *rwb, struct request *rq) { if (!rwb_enabled(rwb)) return; - if (stat == rwb->sync_cookie) { + if (rq == rwb->sync_cookie) { rwb->sync_issue = 0; rwb->sync_cookie = NULL; } @@ -701,7 +734,7 @@ static int wbt_data_dir(const struct request *rq) if (op == REQ_OP_READ) return READ; - else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH) + else if (op_is_write(op)) return WRITE; /* don't account */ @@ -713,8 +746,6 @@ int wbt_init(struct request_queue *q) struct rq_wb *rwb; int i; - BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS); - rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) return -ENOMEM; diff --git a/block/blk-wbt.h b/block/blk-wbt.h index a232c98fbf4d..300df531d0a6 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -14,12 +14,16 @@ enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ WBT_READ = 2, /* read */ WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_DISCARD = 8, /* discard */ - WBT_NR_BITS = 3, /* number of bits */ + WBT_NR_BITS = 4, /* number of bits */ }; enum { - WBT_NUM_RWQ = 2, + WBT_RWQ_BG = 0, + WBT_RWQ_KSWAPD, + WBT_RWQ_DISCARD, + WBT_NUM_RWQ, }; /* @@ -31,31 +35,6 @@ enum { WBT_STATE_ON_MANUAL = 2, }; -static inline void wbt_clear_state(struct blk_issue_stat *stat) -{ - stat->stat &= ~BLK_STAT_RES_MASK; -} - -static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat) -{ - return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT; -} - -static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct) -{ - stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT; -} - -static inline bool wbt_is_tracked(struct blk_issue_stat *stat) -{ - return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED; -} - -static inline bool wbt_is_read(struct blk_issue_stat *stat) -{ - return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ; -} - struct rq_wait { wait_queue_head_t wait; atomic_t inflight; @@ -84,7 +63,7 @@ struct rq_wb { struct blk_stat_callback *cb; - s64 sync_issue; + u64 sync_issue; void *sync_cookie; unsigned int wc; @@ -109,14 +88,19 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb) #ifdef CONFIG_BLK_WBT +static inline void wbt_track(struct request *rq, enum wbt_flags flags) +{ + rq->wbt_flags |= flags; +} + void __wbt_done(struct rq_wb *, enum wbt_flags); -void wbt_done(struct rq_wb *, struct blk_issue_stat *); +void wbt_done(struct rq_wb *, struct request *); enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *); int wbt_init(struct request_queue *); void wbt_exit(struct request_queue *); void wbt_update_limits(struct rq_wb *); -void wbt_requeue(struct rq_wb *, struct blk_issue_stat *); -void wbt_issue(struct rq_wb *, struct blk_issue_stat *); +void wbt_requeue(struct rq_wb *, struct request *); +void wbt_issue(struct rq_wb *, struct request *); void wbt_disable_default(struct request_queue *); void wbt_enable_default(struct request_queue *); @@ -127,10 +111,13 @@ u64 wbt_default_latency_nsec(struct request_queue *); #else +static inline void wbt_track(struct request *rq, enum wbt_flags flags) +{ +} static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags) { } -static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) +static inline void wbt_done(struct rq_wb *rwb, struct request *rq) { } static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, @@ -148,10 +135,10 @@ static inline void wbt_exit(struct request_queue *q) static inline void wbt_update_limits(struct rq_wb *rwb) { } -static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat) +static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq) { } -static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat) +static inline void wbt_issue(struct rq_wb *rwb, struct request *rq) { } static inline void wbt_disable_default(struct request_queue *q) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 08e84ef2bc05..3d08dc84db16 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -328,7 +328,11 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!rep.nr_zones) return -EINVAL; - zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL); + if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone)) + return -ERANGE; + + zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone), + GFP_KERNEL | __GFP_ZERO); if (!zones) return -ENOMEM; @@ -350,7 +354,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, } out: - kfree(zones); + kvfree(zones); return ret; } diff --git a/block/blk.h b/block/blk.h index b034fd2460c4..8d23aea96ce9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -186,7 +186,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); -void blk_account_io_done(struct request *req); +void blk_account_io_done(struct request *req, u64 now); /* * EH timer and IO completion will both attempt to 'grab' the request, make @@ -231,6 +231,9 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq e->type->ops.sq.elevator_deactivate_req_fn(q, rq); } +int elevator_init(struct request_queue *); +int elevator_init_mq(struct request_queue *q); +void elevator_exit(struct request_queue *, struct elevator_queue *); int elv_register_queue(struct request_queue *q); void elv_unregister_queue(struct request_queue *q); diff --git a/block/bounce.c b/block/bounce.c index dd0b93f2a871..fd31347b7836 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -28,28 +28,29 @@ #define POOL_SIZE 64 #define ISA_POOL_SIZE 16 -static struct bio_set *bounce_bio_set, *bounce_bio_split; -static mempool_t *page_pool, *isa_page_pool; +static struct bio_set bounce_bio_set, bounce_bio_split; +static mempool_t page_pool, isa_page_pool; #if defined(CONFIG_HIGHMEM) static __init int init_emergency_pool(void) { + int ret; #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG) if (max_pfn <= max_low_pfn) return 0; #endif - page_pool = mempool_create_page_pool(POOL_SIZE, 0); - BUG_ON(!page_pool); + ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0); + BUG_ON(ret); pr_info("pool size: %d pages\n", POOL_SIZE); - bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - BUG_ON(!bounce_bio_set); - if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE)) + ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + BUG_ON(ret); + if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) BUG_ON(1); - bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0); - BUG_ON(!bounce_bio_split); + ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); + BUG_ON(ret); return 0; } @@ -63,14 +64,11 @@ __initcall(init_emergency_pool); */ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) { - unsigned long flags; unsigned char *vto; - local_irq_save(flags); vto = kmap_atomic(to->bv_page); memcpy(vto + to->bv_offset, vfrom, to->bv_len); kunmap_atomic(vto); - local_irq_restore(flags); } #else /* CONFIG_HIGHMEM */ @@ -94,12 +92,14 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) */ int init_emergency_isa_pool(void) { - if (isa_page_pool) + int ret; + + if (mempool_initialized(&isa_page_pool)) return 0; - isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, - mempool_free_pages, (void *) 0); - BUG_ON(!isa_page_pool); + ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, + mempool_free_pages, (void *) 0); + BUG_ON(ret); pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); return 0; @@ -166,13 +166,13 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) static void bounce_end_io_write(struct bio *bio) { - bounce_end_io(bio, page_pool); + bounce_end_io(bio, &page_pool); } static void bounce_end_io_write_isa(struct bio *bio) { - bounce_end_io(bio, isa_page_pool); + bounce_end_io(bio, &isa_page_pool); } static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) @@ -187,12 +187,12 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) static void bounce_end_io_read(struct bio *bio) { - __bounce_end_io_read(bio, page_pool); + __bounce_end_io_read(bio, &page_pool); } static void bounce_end_io_read_isa(struct bio *bio) { - __bounce_end_io_read(bio, isa_page_pool); + __bounce_end_io_read(bio, &isa_page_pool); } static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, @@ -217,13 +217,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, return; if (!passthrough && sectors < bio_sectors(*bio_orig)) { - bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); + bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split); bio_chain(bio, *bio_orig); generic_make_request(*bio_orig); *bio_orig = bio; } bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL : - bounce_bio_set); + &bounce_bio_set); bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; @@ -250,7 +250,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, bio->bi_flags |= (1 << BIO_BOUNCED); - if (pool == page_pool) { + if (pool == &page_pool) { bio->bi_end_io = bounce_end_io_write; if (rw == READ) bio->bi_end_io = bounce_end_io_read; @@ -282,10 +282,10 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!(q->bounce_gfp & GFP_DMA)) { if (q->limits.bounce_pfn >= blk_max_pfn) return; - pool = page_pool; + pool = &page_pool; } else { - BUG_ON(!isa_page_pool); - pool = isa_page_pool; + BUG_ON(!mempool_initialized(&isa_page_pool)); + pool = &isa_page_pool; } /* diff --git a/block/bsg-lib.c b/block/bsg-lib.c index fc2e5ff2c4b9..9419def8c017 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -303,11 +303,9 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req) * @name: device to give bsg device * @job_fn: bsg job handler * @dd_job_size: size of LLD data needed for each job - * @release: @dev release function */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size, - void (*release)(struct device *)) + bsg_job_fn *job_fn, int dd_job_size) { struct request_queue *q; int ret; @@ -331,7 +329,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, blk_queue_softirq_done(q, bsg_softirq_done); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); - ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release); + ret = bsg_register_queue(q, dev, name, &bsg_transport_ops); if (ret) { printk(KERN_ERR "%s: bsg interface failed to " "initialize - register queue\n", dev->kobj.name); diff --git a/block/bsg.c b/block/bsg.c index defa06c11858..132e657e2d91 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -226,8 +226,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode) return ERR_PTR(ret); rq = blk_get_request(q, hdr->dout_xfer_len ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, - GFP_KERNEL); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) return rq; @@ -249,7 +248,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode) goto out; } - next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); + next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); if (IS_ERR(next_rq)) { ret = PTR_ERR(next_rq); goto out; @@ -650,18 +649,6 @@ static struct bsg_device *bsg_alloc_device(void) return bd; } -static void bsg_kref_release_function(struct kref *kref) -{ - struct bsg_class_device *bcd = - container_of(kref, struct bsg_class_device, ref); - struct device *parent = bcd->parent; - - if (bcd->release) - bcd->release(bcd->parent); - - put_device(parent); -} - static int bsg_put_device(struct bsg_device *bd) { int ret = 0, do_free; @@ -694,7 +681,6 @@ static int bsg_put_device(struct bsg_device *bd) kfree(bd); out: - kref_put(&q->bsg_dev.ref, bsg_kref_release_function); if (do_free) blk_put_queue(q); return ret; @@ -760,8 +746,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) */ mutex_lock(&bsg_mutex); bcd = idr_find(&bsg_minor_idr, iminor(inode)); - if (bcd) - kref_get(&bcd->ref); mutex_unlock(&bsg_mutex); if (!bcd) @@ -772,8 +756,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) return bd; bd = bsg_add_device(inode, bcd->queue, file); - if (IS_ERR(bd)) - kref_put(&bcd->ref, bsg_kref_release_function); return bd; } @@ -913,25 +895,17 @@ void bsg_unregister_queue(struct request_queue *q) sysfs_remove_link(&q->kobj, "bsg"); device_unregister(bcd->class_dev); bcd->class_dev = NULL; - kref_put(&bcd->ref, bsg_kref_release_function); mutex_unlock(&bsg_mutex); } EXPORT_SYMBOL_GPL(bsg_unregister_queue); int bsg_register_queue(struct request_queue *q, struct device *parent, - const char *name, const struct bsg_ops *ops, - void (*release)(struct device *)) + const char *name, const struct bsg_ops *ops) { struct bsg_class_device *bcd; dev_t dev; int ret; struct device *class_dev = NULL; - const char *devname; - - if (name) - devname = name; - else - devname = dev_name(parent); /* * we need a proper transport to send commands, not a stacked device @@ -955,15 +929,12 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, bcd->minor = ret; bcd->queue = q; - bcd->parent = get_device(parent); - bcd->release = release; bcd->ops = ops; - kref_init(&bcd->ref); dev = MKDEV(bsg_major, bcd->minor); - class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname); + class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name); if (IS_ERR(class_dev)) { ret = PTR_ERR(class_dev); - goto put_dev; + goto idr_remove; } bcd->class_dev = class_dev; @@ -978,8 +949,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, unregister_class_dev: device_unregister(class_dev); -put_dev: - put_device(parent); +idr_remove: idr_remove(&bsg_minor_idr, bcd->minor); unlock: mutex_unlock(&bsg_mutex); @@ -993,7 +963,7 @@ int bsg_scsi_register_queue(struct request_queue *q, struct device *parent) return -EINVAL; } - return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL); + return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops); } EXPORT_SYMBOL_GPL(bsg_scsi_register_queue); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9f342ef1ad42..82b6c27b3245 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -210,9 +210,9 @@ struct cfqg_stats { /* total time with empty current active q with other requests queued */ struct blkg_stat empty_time; /* fields after this shouldn't be cleared on stat reset */ - uint64_t start_group_wait_time; - uint64_t start_idle_time; - uint64_t start_empty_time; + u64 start_group_wait_time; + u64 start_idle_time; + u64 start_empty_time; uint16_t flags; #endif /* CONFIG_DEBUG_BLK_CGROUP */ #endif /* CONFIG_CFQ_GROUP_IOSCHED */ @@ -491,13 +491,13 @@ CFQG_FLAG_FNS(empty) /* This should be called with the queue_lock held. */ static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats) { - unsigned long long now; + u64 now; if (!cfqg_stats_waiting(stats)) return; - now = sched_clock(); - if (time_after64(now, stats->start_group_wait_time)) + now = ktime_get_ns(); + if (now > stats->start_group_wait_time) blkg_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); cfqg_stats_clear_waiting(stats); @@ -513,20 +513,20 @@ static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, return; if (cfqg == curr_cfqg) return; - stats->start_group_wait_time = sched_clock(); + stats->start_group_wait_time = ktime_get_ns(); cfqg_stats_mark_waiting(stats); } /* This should be called with the queue_lock held. */ static void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { - unsigned long long now; + u64 now; if (!cfqg_stats_empty(stats)) return; - now = sched_clock(); - if (time_after64(now, stats->start_empty_time)) + now = ktime_get_ns(); + if (now > stats->start_empty_time) blkg_stat_add(&stats->empty_time, now - stats->start_empty_time); cfqg_stats_clear_empty(stats); @@ -552,7 +552,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) if (cfqg_stats_empty(stats)) return; - stats->start_empty_time = sched_clock(); + stats->start_empty_time = ktime_get_ns(); cfqg_stats_mark_empty(stats); } @@ -561,9 +561,9 @@ static void cfqg_stats_update_idle_time(struct cfq_group *cfqg) struct cfqg_stats *stats = &cfqg->stats; if (cfqg_stats_idling(stats)) { - unsigned long long now = sched_clock(); + u64 now = ktime_get_ns(); - if (time_after64(now, stats->start_idle_time)) + if (now > stats->start_idle_time) blkg_stat_add(&stats->idle_time, now - stats->start_idle_time); cfqg_stats_clear_idling(stats); @@ -576,7 +576,7 @@ static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) BUG_ON(cfqg_stats_idling(stats)); - stats->start_idle_time = sched_clock(); + stats->start_idle_time = ktime_get_ns(); cfqg_stats_mark_idling(stats); } @@ -701,17 +701,19 @@ static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, - unsigned int op) + u64 start_time_ns, + u64 io_start_time_ns, + unsigned int op) { struct cfqg_stats *stats = &cfqg->stats; - unsigned long long now = sched_clock(); + u64 now = ktime_get_ns(); - if (time_after64(now, io_start_time)) - blkg_rwstat_add(&stats->service_time, op, now - io_start_time); - if (time_after64(io_start_time, start_time)) + if (now > io_start_time_ns) + blkg_rwstat_add(&stats->service_time, op, + now - io_start_time_ns); + if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, op, - io_start_time - start_time); + io_start_time_ns - start_time_ns); } /* @stats = 0 */ @@ -797,8 +799,9 @@ static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, unsigned int op) { } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, - unsigned int op) { } + u64 start_time_ns, + u64 io_start_time_ns, + unsigned int op) { } #endif /* CONFIG_CFQ_GROUP_IOSCHED */ @@ -4225,8 +4228,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqd->rq_in_driver--; cfqq->dispatched--; (RQ_CFQG(rq))->dispatched--; - cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), - rq_io_start_time_ns(rq), rq->cmd_flags); + cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns, + rq->io_start_time_ns, rq->cmd_flags); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; @@ -4242,16 +4245,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqq_type(cfqq)); st->ttime.last_end_request = now; - /* - * We have to do this check in jiffies since start_time is in - * jiffies and it is not trivial to convert to ns. If - * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test - * will become problematic but so far we are fine (the default - * is 128 ms). - */ - if (!time_after(rq->start_time + - nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]), - jiffies)) + if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now) cfqd->last_delayed_sync = now; } @@ -4792,7 +4786,7 @@ USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, U #undef USEC_STORE_FUNCTION #define CFQ_ATTR(name) \ - __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) + __ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store) static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(quantum), diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 9de9f156e203..ef2f1f09e9b3 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -512,8 +512,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); #undef STORE_FUNCTION #define DD_ATTR(name) \ - __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ - deadline_##name##_store) + __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) static struct elv_fs_entry deadline_attrs[] = { DD_ATTR(read_expire), diff --git a/block/elevator.c b/block/elevator.c index e87e9b43aba0..fa828b5bfd4b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -199,76 +199,46 @@ static void elevator_release(struct kobject *kobj) kfree(e); } -int elevator_init(struct request_queue *q, char *name) +/* + * Use the default elevator specified by config boot param for non-mq devices, + * or by config option. Don't try to load modules as we could be running off + * async and request_module() isn't allowed from async. + */ +int elevator_init(struct request_queue *q) { struct elevator_type *e = NULL; - int err; + int err = 0; /* * q->sysfs_lock must be held to provide mutual exclusion between * elevator_switch() and here. */ - lockdep_assert_held(&q->sysfs_lock); - + mutex_lock(&q->sysfs_lock); if (unlikely(q->elevator)) - return 0; - - INIT_LIST_HEAD(&q->queue_head); - q->last_merge = NULL; - q->end_sector = 0; - q->boundary_rq = NULL; - - if (name) { - e = elevator_get(q, name, true); - if (!e) - return -EINVAL; - } + goto out_unlock; - /* - * Use the default elevator specified by config boot param for - * non-mq devices, or by config option. Don't try to load modules - * as we could be running off async and request_module() isn't - * allowed from async. - */ - if (!e && !q->mq_ops && *chosen_elevator) { + if (*chosen_elevator) { e = elevator_get(q, chosen_elevator, false); if (!e) printk(KERN_ERR "I/O scheduler %s not found\n", chosen_elevator); } + if (!e) + e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false); if (!e) { - /* - * For blk-mq devices, we default to using mq-deadline, - * if available, for single queue devices. If deadline - * isn't available OR we have multiple queues, default - * to "none". - */ - if (q->mq_ops) { - if (q->nr_hw_queues == 1) - e = elevator_get(q, "mq-deadline", false); - if (!e) - return 0; - } else - e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false); - - if (!e) { - printk(KERN_ERR - "Default I/O scheduler not found. " \ - "Using noop.\n"); - e = elevator_get(q, "noop", false); - } + printk(KERN_ERR + "Default I/O scheduler not found. Using noop.\n"); + e = elevator_get(q, "noop", false); } - if (e->uses_mq) - err = blk_mq_init_sched(q, e); - else - err = e->ops.sq.elevator_init_fn(q, e); + err = e->ops.sq.elevator_init_fn(q, e); if (err) elevator_put(e); +out_unlock: + mutex_unlock(&q->sysfs_lock); return err; } -EXPORT_SYMBOL(elevator_init); void elevator_exit(struct request_queue *q, struct elevator_queue *e) { @@ -281,7 +251,6 @@ void elevator_exit(struct request_queue *q, struct elevator_queue *e) kobject_put(&e->kobj); } -EXPORT_SYMBOL(elevator_exit); static inline void __elv_rqhash_del(struct request *rq) { @@ -1005,6 +974,40 @@ out: } /* + * For blk-mq devices, we default to using mq-deadline, if available, for single + * queue devices. If deadline isn't available OR we have multiple queues, + * default to "none". + */ +int elevator_init_mq(struct request_queue *q) +{ + struct elevator_type *e; + int err = 0; + + if (q->nr_hw_queues != 1) + return 0; + + /* + * q->sysfs_lock must be held to provide mutual exclusion between + * elevator_switch() and here. + */ + mutex_lock(&q->sysfs_lock); + if (unlikely(q->elevator)) + goto out_unlock; + + e = elevator_get(q, "mq-deadline", false); + if (!e) + goto out_unlock; + + err = blk_mq_init_sched(q, e); + if (err) + elevator_put(e); +out_unlock: + mutex_unlock(&q->sysfs_lock); + return err; +} + + +/* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we * need for the new one. this way we have a chance of going back to the old diff --git a/block/genhd.c b/block/genhd.c index c4513fe1adda..f1543a45e73b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1027,18 +1027,6 @@ static const struct seq_operations partitions_op = { .stop = disk_seqf_stop, .show = show_partition }; - -static int partitions_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &partitions_op); -} - -static const struct file_operations proc_partitions_operations = { - .open = partitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif @@ -1139,28 +1127,25 @@ static ssize_t disk_discard_alignment_show(struct device *dev, return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); } -static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); -static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); -static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); -static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL); -static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); -static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); -static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); -static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, - NULL); -static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); -static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); -static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show, - disk_badblocks_store); +static DEVICE_ATTR(range, 0444, disk_range_show, NULL); +static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL); +static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL); +static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL); +static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL); +static DEVICE_ATTR(size, 0444, part_size_show, NULL); +static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL); +static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); +static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); +static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); +static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); +static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = - __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); + __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); #endif #ifdef CONFIG_FAIL_IO_TIMEOUT static struct device_attribute dev_attr_fail_timeout = - __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, - part_timeout_store); + __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); #endif static struct attribute *disk_attrs[] = { @@ -1377,22 +1362,10 @@ static const struct seq_operations diskstats_op = { .show = diskstats_show }; -static int diskstats_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &diskstats_op); -} - -static const struct file_operations proc_diskstats_operations = { - .open = diskstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_genhd_init(void) { - proc_create("diskstats", 0, NULL, &proc_diskstats_operations); - proc_create("partitions", 0, NULL, &proc_partitions_operations); + proc_create_seq("diskstats", 0, NULL, &diskstats_op); + proc_create_seq("partitions", 0, NULL, &partitions_op); return 0; } module_init(proc_genhd_init); @@ -1924,9 +1897,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev, return count; } -static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); -static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); -static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, +static const DEVICE_ATTR(events, 0444, disk_events_show, NULL); +static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL); +static const DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show, disk_events_poll_msecs_store); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 0d6d25e32e1f..a1660bafc912 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -72,6 +72,19 @@ static const unsigned int kyber_batch_size[] = { [KYBER_OTHER] = 8, }; +/* + * There is a same mapping between ctx & hctx and kcq & khd, + * we use request->mq_ctx->index_hw to index the kcq in khd. + */ +struct kyber_ctx_queue { + /* + * Used to ensure operations on rq_list and kcq_map to be an atmoic one. + * Also protect the rqs on rq_list when merge. + */ + spinlock_t lock; + struct list_head rq_list[KYBER_NUM_DOMAINS]; +} ____cacheline_aligned_in_smp; + struct kyber_queue_data { struct request_queue *q; @@ -99,6 +112,8 @@ struct kyber_hctx_data { struct list_head rqs[KYBER_NUM_DOMAINS]; unsigned int cur_domain; unsigned int batching; + struct kyber_ctx_queue *kcqs; + struct sbitmap kcq_map[KYBER_NUM_DOMAINS]; wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS]; @@ -107,10 +122,8 @@ struct kyber_hctx_data { static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -static int rq_sched_domain(const struct request *rq) +static unsigned int kyber_sched_domain(unsigned int op) { - unsigned int op = rq->cmd_flags; - if ((op & REQ_OP_MASK) == REQ_OP_READ) return KYBER_READ; else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op)) @@ -284,6 +297,11 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd) return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; } +static int kyber_bucket_fn(const struct request *rq) +{ + return kyber_sched_domain(rq->cmd_flags); +} + static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) { struct kyber_queue_data *kqd; @@ -297,7 +315,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) goto err; kqd->q = q; - kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain, + kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn, KYBER_NUM_DOMAINS, kqd); if (!kqd->cb) goto err_kqd; @@ -376,8 +394,18 @@ static void kyber_exit_sched(struct elevator_queue *e) kfree(kqd); } +static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq) +{ + unsigned int i; + + spin_lock_init(&kcq->lock); + for (i = 0; i < KYBER_NUM_DOMAINS; i++) + INIT_LIST_HEAD(&kcq->rq_list[i]); +} + static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; struct kyber_hctx_data *khd; int i; @@ -385,6 +413,24 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) if (!khd) return -ENOMEM; + khd->kcqs = kmalloc_array_node(hctx->nr_ctx, + sizeof(struct kyber_ctx_queue), + GFP_KERNEL, hctx->numa_node); + if (!khd->kcqs) + goto err_khd; + + for (i = 0; i < hctx->nr_ctx; i++) + kyber_ctx_queue_init(&khd->kcqs[i]); + + for (i = 0; i < KYBER_NUM_DOMAINS; i++) { + if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, + ilog2(8), GFP_KERNEL, hctx->numa_node)) { + while (--i >= 0) + sbitmap_free(&khd->kcq_map[i]); + goto err_kcqs; + } + } + spin_lock_init(&khd->lock); for (i = 0; i < KYBER_NUM_DOMAINS; i++) { @@ -400,12 +446,26 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) khd->batching = 0; hctx->sched_data = khd; + sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags, + kqd->async_depth); return 0; + +err_kcqs: + kfree(khd->kcqs); +err_khd: + kfree(khd); + return -ENOMEM; } static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + struct kyber_hctx_data *khd = hctx->sched_data; + int i; + + for (i = 0; i < KYBER_NUM_DOMAINS; i++) + sbitmap_free(&khd->kcq_map[i]); + kfree(khd->kcqs); kfree(hctx->sched_data); } @@ -427,7 +487,7 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd, nr = rq_get_domain_token(rq); if (nr != -1) { - sched_domain = rq_sched_domain(rq); + sched_domain = kyber_sched_domain(rq->cmd_flags); sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr, rq->mq_ctx->cpu); } @@ -446,11 +506,51 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } +static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +{ + struct kyber_hctx_data *khd = hctx->sched_data; + struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); + struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw]; + unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); + struct list_head *rq_list = &kcq->rq_list[sched_domain]; + bool merged; + + spin_lock(&kcq->lock); + merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio); + spin_unlock(&kcq->lock); + blk_mq_put_ctx(ctx); + + return merged; +} + static void kyber_prepare_request(struct request *rq, struct bio *bio) { rq_set_domain_token(rq, -1); } +static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, + struct list_head *rq_list, bool at_head) +{ + struct kyber_hctx_data *khd = hctx->sched_data; + struct request *rq, *next; + + list_for_each_entry_safe(rq, next, rq_list, queuelist) { + unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); + struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw]; + struct list_head *head = &kcq->rq_list[sched_domain]; + + spin_lock(&kcq->lock); + if (at_head) + list_move(&rq->queuelist, head); + else + list_move_tail(&rq->queuelist, head); + sbitmap_set_bit(&khd->kcq_map[sched_domain], + rq->mq_ctx->index_hw); + blk_mq_sched_request_inserted(rq); + spin_unlock(&kcq->lock); + } +} + static void kyber_finish_request(struct request *rq) { struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; @@ -469,7 +569,7 @@ static void kyber_completed_request(struct request *rq) * Check if this request met our latency goal. If not, quickly gather * some statistics and start throttling. */ - sched_domain = rq_sched_domain(rq); + sched_domain = kyber_sched_domain(rq->cmd_flags); switch (sched_domain) { case KYBER_READ: target = kqd->read_lat_nsec; @@ -485,29 +585,48 @@ static void kyber_completed_request(struct request *rq) if (blk_stat_is_active(kqd->cb)) return; - now = __blk_stat_time(ktime_to_ns(ktime_get())); - if (now < blk_stat_time(&rq->issue_stat)) + now = ktime_get_ns(); + if (now < rq->io_start_time_ns) return; - latency = now - blk_stat_time(&rq->issue_stat); + latency = now - rq->io_start_time_ns; if (latency > target) blk_stat_activate_msecs(kqd->cb, 10); } -static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd, - struct blk_mq_hw_ctx *hctx) +struct flush_kcq_data { + struct kyber_hctx_data *khd; + unsigned int sched_domain; + struct list_head *list; +}; + +static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data) { - LIST_HEAD(rq_list); - struct request *rq, *next; + struct flush_kcq_data *flush_data = data; + struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr]; - blk_mq_flush_busy_ctxs(hctx, &rq_list); - list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - unsigned int sched_domain; + spin_lock(&kcq->lock); + list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain], + flush_data->list); + sbitmap_clear_bit(sb, bitnr); + spin_unlock(&kcq->lock); - sched_domain = rq_sched_domain(rq); - list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]); - } + return true; +} + +static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd, + unsigned int sched_domain, + struct list_head *list) +{ + struct flush_kcq_data data = { + .khd = khd, + .sched_domain = sched_domain, + .list = list, + }; + + sbitmap_for_each_set(&khd->kcq_map[sched_domain], + flush_busy_kcq, &data); } static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, @@ -570,26 +689,23 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, static struct request * kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, struct kyber_hctx_data *khd, - struct blk_mq_hw_ctx *hctx, - bool *flushed) + struct blk_mq_hw_ctx *hctx) { struct list_head *rqs; struct request *rq; int nr; rqs = &khd->rqs[khd->cur_domain]; - rq = list_first_entry_or_null(rqs, struct request, queuelist); /* - * If there wasn't already a pending request and we haven't flushed the - * software queues yet, flush the software queues and check again. + * If we already have a flushed request, then we just need to get a + * token for it. Otherwise, if there are pending requests in the kcqs, + * flush the kcqs, but only if we can get a token. If not, we should + * leave the requests in the kcqs so that they can be merged. Note that + * khd->lock serializes the flushes, so if we observed any bit set in + * the kcq_map, we will always get a request. */ - if (!rq && !*flushed) { - kyber_flush_busy_ctxs(khd, hctx); - *flushed = true; - rq = list_first_entry_or_null(rqs, struct request, queuelist); - } - + rq = list_first_entry_or_null(rqs, struct request, queuelist); if (rq) { nr = kyber_get_domain_token(kqd, khd, hctx); if (nr >= 0) { @@ -598,6 +714,16 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, list_del_init(&rq->queuelist); return rq; } + } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { + nr = kyber_get_domain_token(kqd, khd, hctx); + if (nr >= 0) { + kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs); + rq = list_first_entry(rqs, struct request, queuelist); + khd->batching++; + rq_set_domain_token(rq, nr); + list_del_init(&rq->queuelist); + return rq; + } } /* There were either no pending requests or no tokens. */ @@ -608,7 +734,6 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx) { struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; struct kyber_hctx_data *khd = hctx->sched_data; - bool flushed = false; struct request *rq; int i; @@ -619,7 +744,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx) * from the batch. */ if (khd->batching < kyber_batch_size[khd->cur_domain]) { - rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed); + rq = kyber_dispatch_cur_domain(kqd, khd, hctx); if (rq) goto out; } @@ -640,7 +765,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx) else khd->cur_domain++; - rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed); + rq = kyber_dispatch_cur_domain(kqd, khd, hctx); if (rq) goto out; } @@ -657,10 +782,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) int i; for (i = 0; i < KYBER_NUM_DOMAINS; i++) { - if (!list_empty_careful(&khd->rqs[i])) + if (!list_empty_careful(&khd->rqs[i]) || + sbitmap_any_bit_set(&khd->kcq_map[i])) return true; } - return sbitmap_any_bit_set(&hctx->ctx_map); + + return false; } #define KYBER_LAT_SHOW_STORE(op) \ @@ -831,7 +958,9 @@ static struct elevator_type kyber_sched = { .init_hctx = kyber_init_hctx, .exit_hctx = kyber_exit_hctx, .limit_depth = kyber_limit_depth, + .bio_merge = kyber_bio_merge, .prepare_request = kyber_prepare_request, + .insert_requests = kyber_insert_requests, .finish_request = kyber_finish_request, .requeue_request = kyber_finish_request, .completed_request = kyber_completed_request, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 8ec0ba9f5386..099a9e05854c 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -630,8 +630,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); #undef STORE_FUNCTION #define DD_ATTR(name) \ - __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ - deadline_##name##_store) + __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) static struct elv_fs_entry deadline_attrs[] = { DD_ATTR(read_expire), diff --git a/block/partition-generic.c b/block/partition-generic.c index db57cced9b98..3dcfd4ec0e11 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -179,18 +179,17 @@ ssize_t part_fail_store(struct device *dev, } #endif -static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); -static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); -static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); -static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); -static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); -static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, - NULL); -static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); +static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); +static DEVICE_ATTR(start, 0444, part_start_show, NULL); +static DEVICE_ATTR(size, 0444, part_size_show, NULL); +static DEVICE_ATTR(ro, 0444, part_ro_show, NULL); +static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); +static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); +static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); +static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = - __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); + __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); #endif static struct attribute *part_attrs[] = { @@ -291,8 +290,7 @@ static ssize_t whole_disk_show(struct device *dev, { return 0; } -static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, - whole_disk_show, NULL); +static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); /* * Must be called either with bd_mutex held, before a disk can be opened or @@ -518,7 +516,7 @@ rescan: if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); - check_disk_size_change(disk, bdev); + check_disk_size_change(disk, bdev, true); bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; @@ -643,7 +641,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) return res; set_capacity(disk, 0); - check_disk_size_change(disk, bdev); + check_disk_size_change(disk, bdev, false); bdev->bd_invalidated = 0; /* tell userspace that the media / partition table may have changed */ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 60b471f8621b..533f4aee8567 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -321,8 +321,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, at_head = 1; ret = -ENOMEM; - rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, - GFP_KERNEL); + rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); req = scsi_req(rq); @@ -449,8 +448,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } - rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, - __GFP_RECLAIM); + rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; @@ -501,7 +499,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, break; } - if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) { + if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO)) { err = DRIVER_ERROR << 24; goto error; } @@ -538,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, struct request *rq; int err; - rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM); + rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0); if (IS_ERR(rq)) return PTR_ERR(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; diff --git a/crypto/proc.c b/crypto/proc.c index 822fcef6d91c..f4eb6139973e 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -94,21 +94,9 @@ static const struct seq_operations crypto_seq_ops = { .show = c_show }; -static int crypto_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &crypto_seq_ops); -} - -static const struct file_operations proc_crypto_ops = { - .open = crypto_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - void __init crypto_init_proc(void) { - proc_create("crypto", 0, NULL, &proc_crypto_ops); + proc_create_seq("crypto", 0, NULL, &crypto_seq_ops); } void __exit crypto_exit_proc(void) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 2d8de2f8c1ed..84fdfa70920a 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -82,7 +82,6 @@ static SIMPLE_DEV_PM_OPS(acpi_ac_pm, NULL, acpi_ac_resume); #ifdef CONFIG_ACPI_PROCFS_POWER extern struct proc_dir_entry *acpi_lock_ac_dir(void); extern void *acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir); -static int acpi_ac_open_fs(struct inode *inode, struct file *file); #endif @@ -111,16 +110,6 @@ struct acpi_ac { #define to_acpi_ac(x) power_supply_get_drvdata(x) -#ifdef CONFIG_ACPI_PROCFS_POWER -static const struct file_operations acpi_ac_fops = { - .owner = THIS_MODULE, - .open = acpi_ac_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - /* -------------------------------------------------------------------------- AC Adapter Management -------------------------------------------------------------------------- */ @@ -209,11 +198,6 @@ static int acpi_ac_seq_show(struct seq_file *seq, void *offset) return 0; } -static int acpi_ac_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_ac_seq_show, PDE_DATA(inode)); -} - static int acpi_ac_add_fs(struct acpi_ac *ac) { struct proc_dir_entry *entry = NULL; @@ -228,9 +212,8 @@ static int acpi_ac_add_fs(struct acpi_ac *ac) } /* 'state' [R] */ - entry = proc_create_data(ACPI_AC_FILE_STATE, - S_IRUGO, acpi_device_dir(ac->device), - &acpi_ac_fops, ac); + entry = proc_create_single_data(ACPI_AC_FILE_STATE, S_IRUGO, + acpi_device_dir(ac->device), acpi_ac_seq_show, ac); if (!entry) return -ENODEV; return 0; diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index bdb24d636d9a..76550689ce10 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -81,14 +81,6 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); #ifdef CONFIG_ACPI_PROCFS_POWER extern struct proc_dir_entry *acpi_lock_battery_dir(void); extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir); - -enum acpi_battery_files { - info_tag = 0, - state_tag, - alarm_tag, - ACPI_BATTERY_NUMFILES, -}; - #endif static const struct acpi_device_id battery_device_ids[] = { @@ -985,9 +977,10 @@ static const char *acpi_battery_units(const struct acpi_battery *battery) "mA" : "mW"; } -static int acpi_battery_print_info(struct seq_file *seq, int result) +static int acpi_battery_info_proc_show(struct seq_file *seq, void *offset) { struct acpi_battery *battery = seq->private; + int result = acpi_battery_update(battery, false); if (result) goto end; @@ -1041,9 +1034,10 @@ static int acpi_battery_print_info(struct seq_file *seq, int result) return result; } -static int acpi_battery_print_state(struct seq_file *seq, int result) +static int acpi_battery_state_proc_show(struct seq_file *seq, void *offset) { struct acpi_battery *battery = seq->private; + int result = acpi_battery_update(battery, false); if (result) goto end; @@ -1088,9 +1082,10 @@ static int acpi_battery_print_state(struct seq_file *seq, int result) return result; } -static int acpi_battery_print_alarm(struct seq_file *seq, int result) +static int acpi_battery_alarm_proc_show(struct seq_file *seq, void *offset) { struct acpi_battery *battery = seq->private; + int result = acpi_battery_update(battery, false); if (result) goto end; @@ -1142,82 +1137,22 @@ static ssize_t acpi_battery_write_alarm(struct file *file, return result; } -typedef int(*print_func)(struct seq_file *seq, int result); - -static print_func acpi_print_funcs[ACPI_BATTERY_NUMFILES] = { - acpi_battery_print_info, - acpi_battery_print_state, - acpi_battery_print_alarm, -}; - -static int acpi_battery_read(int fid, struct seq_file *seq) +static int acpi_battery_alarm_proc_open(struct inode *inode, struct file *file) { - struct acpi_battery *battery = seq->private; - int result = acpi_battery_update(battery, false); - return acpi_print_funcs[fid](seq, result); + return single_open(file, acpi_battery_alarm_proc_show, PDE_DATA(inode)); } -#define DECLARE_FILE_FUNCTIONS(_name) \ -static int acpi_battery_read_##_name(struct seq_file *seq, void *offset) \ -{ \ - return acpi_battery_read(_name##_tag, seq); \ -} \ -static int acpi_battery_##_name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, acpi_battery_read_##_name, PDE_DATA(inode)); \ -} - -DECLARE_FILE_FUNCTIONS(info); -DECLARE_FILE_FUNCTIONS(state); -DECLARE_FILE_FUNCTIONS(alarm); - -#undef DECLARE_FILE_FUNCTIONS - -#define FILE_DESCRIPTION_RO(_name) \ - { \ - .name = __stringify(_name), \ - .mode = S_IRUGO, \ - .ops = { \ - .open = acpi_battery_##_name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - .owner = THIS_MODULE, \ - }, \ - } - -#define FILE_DESCRIPTION_RW(_name) \ - { \ - .name = __stringify(_name), \ - .mode = S_IFREG | S_IRUGO | S_IWUSR, \ - .ops = { \ - .open = acpi_battery_##_name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .write = acpi_battery_write_##_name, \ - .release = single_release, \ - .owner = THIS_MODULE, \ - }, \ - } - -static const struct battery_file { - struct file_operations ops; - umode_t mode; - const char *name; -} acpi_battery_file[] = { - FILE_DESCRIPTION_RO(info), - FILE_DESCRIPTION_RO(state), - FILE_DESCRIPTION_RW(alarm), +static const struct file_operations acpi_battery_alarm_fops = { + .owner = THIS_MODULE, + .open = acpi_battery_alarm_proc_open, + .read = seq_read, + .write = acpi_battery_write_alarm, + .llseek = seq_lseek, + .release = single_release, }; -#undef FILE_DESCRIPTION_RO -#undef FILE_DESCRIPTION_RW - static int acpi_battery_add_fs(struct acpi_device *device) { - struct proc_dir_entry *entry = NULL; - int i; - printk(KERN_WARNING PREFIX "Deprecated procfs I/F for battery is loaded," " please retry with CONFIG_ACPI_PROCFS_POWER cleared\n"); if (!acpi_device_dir(device)) { @@ -1227,28 +1162,24 @@ static int acpi_battery_add_fs(struct acpi_device *device) return -ENODEV; } - for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { - entry = proc_create_data(acpi_battery_file[i].name, - acpi_battery_file[i].mode, - acpi_device_dir(device), - &acpi_battery_file[i].ops, - acpi_driver_data(device)); - if (!entry) - return -ENODEV; - } + if (!proc_create_single_data("info", S_IRUGO, acpi_device_dir(device), + acpi_battery_info_proc_show, acpi_driver_data(device))) + return -ENODEV; + if (!proc_create_single_data("state", S_IRUGO, acpi_device_dir(device), + acpi_battery_state_proc_show, acpi_driver_data(device))) + return -ENODEV; + if (!proc_create_data("alarm", S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), &acpi_battery_alarm_fops, + acpi_driver_data(device))) + return -ENODEV; return 0; } static void acpi_battery_remove_fs(struct acpi_device *device) { - int i; if (!acpi_device_dir(device)) return; - for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) - remove_proc_entry(acpi_battery_file[i].name, - acpi_device_dir(device)); - - remove_proc_entry(acpi_device_bid(device), acpi_battery_dir); + remove_proc_subtree(acpi_device_bid(device), acpi_battery_dir); acpi_device_dir(device) = NULL; } diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index f1cc4f9d31cd..2345a5ee2dbb 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -263,19 +263,6 @@ static int acpi_button_state_seq_show(struct seq_file *seq, void *offset) return 0; } -static int acpi_button_state_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_button_state_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations acpi_button_state_fops = { - .owner = THIS_MODULE, - .open = acpi_button_state_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int acpi_button_add_fs(struct acpi_device *device) { struct acpi_button *button = acpi_driver_data(device); @@ -311,9 +298,9 @@ static int acpi_button_add_fs(struct acpi_device *device) } /* create /proc/acpi/button/lid/LID/state */ - entry = proc_create_data(ACPI_BUTTON_FILE_STATE, - S_IRUGO, acpi_device_dir(device), - &acpi_button_state_fops, device); + entry = proc_create_single_data(ACPI_BUTTON_FILE_STATE, S_IRUGO, + acpi_device_dir(device), acpi_button_state_seq_show, + device); if (!entry) { ret = -ENODEV; goto remove_dev_dir; diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 4a3ac31c07d0..3b0118786b43 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -20,6 +20,7 @@ #include <linux/sizes.h> #include <linux/limits.h> #include <linux/clk/clk-conf.h> +#include <linux/platform_device.h> #include <asm/irq.h> @@ -193,14 +194,16 @@ static const struct dev_pm_ops amba_pm = { /* * Primecells are part of the Advanced Microcontroller Bus Architecture, * so we call the bus "amba". + * DMA configuration for platform and AMBA bus is same. So here we reuse + * platform's DMA config routine. */ struct bus_type amba_bustype = { .name = "amba", .dev_groups = amba_dev_groups, .match = amba_match, .uevent = amba_uevent, + .dma_configure = platform_dma_configure, .pm = &amba_pm, - .force_dma = true, }; static int __init amba_init(void) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6389c88b3500..738fb22978dd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -334,6 +334,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c07), board_ahci_mobile }, /* Lynx LP RAID */ { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_mobile }, /* Lynx LP RAID */ { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_mobile }, /* Lynx LP RAID */ + { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_mobile }, /* Cannon Lake PCH-LP AHCI */ { PCI_VDEVICE(INTEL, 0x1f22), board_ahci }, /* Avoton AHCI */ { PCI_VDEVICE(INTEL, 0x1f23), board_ahci }, /* Avoton AHCI */ { PCI_VDEVICE(INTEL, 0x1f24), board_ahci }, /* Avoton RAID */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 68596bd4cf06..346b163f6e89 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4493,6 +4493,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */ { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, }, + /* Some Sandisk SSDs lock up hard with NCQ enabled. Reported on + SD7SN6S256G and SD8SN8U256G */ + { "SanDisk SD[78]SN*G", NULL, ATA_HORKAGE_NONCQ, }, + /* devices which puke on READ_NATIVE_MAX */ { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA }, @@ -4549,13 +4553,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, - /* This specific Samsung model/firmware-rev does not handle LPM well */ + /* These specific Samsung models/firmware-revs do not handle LPM well */ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, + { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, /* Sandisk devices which are known to not handle LPM well */ { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ + { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 513b260bcff1..a2398e28c295 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -500,57 +500,6 @@ void ata_eh_release(struct ata_port *ap) mutex_unlock(&ap->host->eh_mutex); } -/** - * ata_scsi_timed_out - SCSI layer time out callback - * @cmd: timed out SCSI command - * - * Handles SCSI layer timeout. We race with normal completion of - * the qc for @cmd. If the qc is already gone, we lose and let - * the scsi command finish (EH_HANDLED). Otherwise, the qc has - * timed out and EH should be invoked. Prevent ata_qc_complete() - * from finishing it by setting EH_SCHEDULED and return - * EH_NOT_HANDLED. - * - * TODO: kill this function once old EH is gone. - * - * LOCKING: - * Called from timer context - * - * RETURNS: - * EH_HANDLED or EH_NOT_HANDLED - */ -enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) -{ - struct Scsi_Host *host = cmd->device->host; - struct ata_port *ap = ata_shost_to_port(host); - unsigned long flags; - struct ata_queued_cmd *qc; - enum blk_eh_timer_return ret; - - DPRINTK("ENTER\n"); - - if (ap->ops->error_handler) { - ret = BLK_EH_NOT_HANDLED; - goto out; - } - - ret = BLK_EH_HANDLED; - spin_lock_irqsave(ap->lock, flags); - qc = ata_qc_from_tag(ap, ap->link.active_tag); - if (qc) { - WARN_ON(qc->scsicmd != cmd); - qc->flags |= ATA_QCFLAG_EH_SCHEDULED; - qc->err_mask |= AC_ERR_TIMEOUT; - ret = BLK_EH_NOT_HANDLED; - } - spin_unlock_irqrestore(ap->lock, flags); - - out: - DPRINTK("EXIT, ret=%d\n", ret); - return ret; -} -EXPORT_SYMBOL(ata_scsi_timed_out); - static void ata_eh_unload(struct ata_port *ap) { struct ata_link *link; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 9c9a22958717..a8d2eb0ceb8d 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1151,8 +1151,8 @@ static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte, } -static unsigned char eprom_try_esi(struct atm_dev *dev, unsigned short cmd, - int offset, int swap) +static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset, + int swap) { unsigned char buf[ZEPROM_SIZE]; struct zatm_dev *zatm_dev; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2da998baa75c..30cc9c877ebb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -534,14 +534,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, &dev_attr_spectre_v1.attr, &dev_attr_spectre_v2.attr, + &dev_attr_spec_store_bypass.attr, NULL }; diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index d82566d6e237..f831a582209c 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -329,36 +329,13 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) #endif /* - * Common configuration to enable DMA API use for a device + * enables DMA API use for a device */ -#include <linux/pci.h> - int dma_configure(struct device *dev) { - struct device *bridge = NULL, *dma_dev = dev; - enum dev_dma_attr attr; - int ret = 0; - - if (dev_is_pci(dev)) { - bridge = pci_get_host_bridge_device(to_pci_dev(dev)); - dma_dev = bridge; - if (IS_ENABLED(CONFIG_OF) && dma_dev->parent && - dma_dev->parent->of_node) - dma_dev = dma_dev->parent; - } - - if (dma_dev->of_node) { - ret = of_dma_configure(dev, dma_dev->of_node); - } else if (has_acpi_companion(dma_dev)) { - attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode)); - if (attr != DEV_DMA_NOT_SUPPORTED) - ret = acpi_dma_configure(dev, attr); - } - - if (bridge) - pci_put_host_bridge_device(bridge); - - return ret; + if (dev->bus->dma_configure) + return dev->bus->dma_configure(dev); + return 0; } void dma_deconfigure(struct device *dev) diff --git a/drivers/base/node.c b/drivers/base/node.c index 7a3a580821e0..a5e821d09656 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -490,7 +490,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, return 0; } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages, + bool check_nid) { unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn; @@ -514,7 +515,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) mem_blk = find_memory_block_hinted(mem_sect, mem_blk); - ret = register_mem_sect_under_node(mem_blk, nid, true); + ret = register_mem_sect_under_node(mem_blk, nid, check_nid); if (!err) err = ret; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 8075ddc70a17..c0ff1e73a634 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1130,6 +1130,22 @@ int platform_pm_restore(struct device *dev) #endif /* CONFIG_HIBERNATE_CALLBACKS */ +int platform_dma_configure(struct device *dev) +{ + enum dev_dma_attr attr; + int ret = 0; + + if (dev->of_node) { + ret = of_dma_configure(dev, dev->of_node, true); + } else if (has_acpi_companion(dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); + if (attr != DEV_DMA_NOT_SUPPORTED) + ret = acpi_dma_configure(dev, attr); + } + + return ret; +} + static const struct dev_pm_ops platform_dev_pm_ops = { .runtime_suspend = pm_generic_runtime_suspend, .runtime_resume = pm_generic_runtime_resume, @@ -1141,8 +1157,8 @@ struct bus_type platform_bus_type = { .dev_groups = platform_dev_groups, .match = platform_match, .uevent = platform_uevent, + .dma_configure = platform_dma_configure, .pm = &platform_dev_pm_ops, - .force_dma = true, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 02a497e7c785..e5e067091572 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1923,10 +1923,8 @@ static int device_prepare(struct device *dev, pm_message_t state) dev->power.wakeup_path = false; - if (dev->power.no_pm_callbacks) { - ret = 1; /* Let device go direct_complete */ + if (dev->power.no_pm_callbacks) goto unlock; - } if (dev->pm_domain) callback = dev->pm_domain->ops.prepare; @@ -1960,7 +1958,8 @@ unlock: */ spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && - pm_runtime_suspended(dev) && ret > 0 && + ((pm_runtime_suspended(dev) && ret > 0) || + dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP); spin_unlock_irq(&dev->power.lock); return 0; diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index f040aba48d50..27e9686b6d3a 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -184,7 +184,7 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq) { int i; static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"}; - char interrupts[20]; + char interrupts[25]; char *ints = interrupts; for (i = 0; i < ARRAY_SIZE(irq_name); i++) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index e6986c7608f1..fc1f4acdd189 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -207,7 +207,7 @@ static void bcma_of_fill_device(struct device *parent, core->irq = bcma_of_get_irq(parent, core, 0); - of_dma_configure(&core->dev, node); + of_dma_configure(&core->dev, node, false); } unsigned int bcma_core_irq(struct bcma_device *core, int num) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index f781eff7d23e..6ca77d6047d6 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1179,7 +1179,6 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) return DAC960_Failure(Controller, "DMA mask out of range"); - Controller->BounceBufferLimit = DMA_BIT_MASK(32); if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) { CommandMailboxesSize = 0; @@ -1380,11 +1379,8 @@ static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T dma_addr_t CommandMailboxDMA; DAC960_V2_CommandStatus_T CommandStatus; - if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64))) - Controller->BounceBufferLimit = DMA_BIT_MASK(64); - else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - Controller->BounceBufferLimit = DMA_BIT_MASK(32); - else + if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) && + pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) return DAC960_Failure(Controller, "DMA mask out of range"); /* This is a temporary dma mapping, used only in the scope of this function */ @@ -2540,7 +2536,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) continue; } Controller->RequestQueue[n] = RequestQueue; - blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit); RequestQueue->queuedata = Controller; blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); @@ -6451,19 +6446,6 @@ static int dac960_proc_show(struct seq_file *m, void *v) return 0; } -static int dac960_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dac960_proc_show, NULL); -} - -static const struct file_operations dac960_proc_fops = { - .owner = THIS_MODULE, - .open = dac960_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int dac960_initial_status_proc_show(struct seq_file *m, void *v) { DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; @@ -6471,19 +6453,6 @@ static int dac960_initial_status_proc_show(struct seq_file *m, void *v) return 0; } -static int dac960_initial_status_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dac960_initial_status_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations dac960_initial_status_proc_fops = { - .owner = THIS_MODULE, - .open = dac960_initial_status_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int dac960_current_status_proc_show(struct seq_file *m, void *v) { DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; @@ -6517,19 +6486,6 @@ static int dac960_current_status_proc_show(struct seq_file *m, void *v) return 0; } -static int dac960_current_status_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dac960_current_status_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations dac960_current_status_proc_fops = { - .owner = THIS_MODULE, - .open = dac960_current_status_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int dac960_user_command_proc_show(struct seq_file *m, void *v) { DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; @@ -6584,17 +6540,19 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) if (DAC960_ProcDirectoryEntry == NULL) { DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); - proc_create("status", 0, DAC960_ProcDirectoryEntry, - &dac960_proc_fops); + proc_create_single("status", 0, DAC960_ProcDirectoryEntry, + dac960_proc_show); } snprintf(Controller->ControllerName, sizeof(Controller->ControllerName), "c%d", Controller->ControllerNumber); ControllerProcEntry = proc_mkdir(Controller->ControllerName, DAC960_ProcDirectoryEntry); - proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller); - proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller); - proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); + proc_create_single_data("initial_status", 0, ControllerProcEntry, + dac960_initial_status_proc_show, Controller); + proc_create_single_data("current_status", 0, ControllerProcEntry, + dac960_current_status_proc_show, Controller); + proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); Controller->ControllerProcEntry = ControllerProcEntry; } diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h index 21aff470d268..1439e651928b 100644 --- a/drivers/block/DAC960.h +++ b/drivers/block/DAC960.h @@ -2295,7 +2295,6 @@ typedef struct DAC960_Controller unsigned short MaxBlocksPerCommand; unsigned short ControllerScatterGatherLimit; unsigned short DriverScatterGatherLimit; - u64 BounceBufferLimit; unsigned int CombinedStatusBufferLength; unsigned int InitialStatusLength; unsigned int CurrentStatusLength; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 6797e6c23c8a..429ebb84b592 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -159,14 +159,14 @@ static int aoe_debugfs_open(struct inode *inode, struct file *file) return single_open(file, aoedisk_debugfs_show, inode->i_private); } -static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); -static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); -static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL); +static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL); +static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL); +static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL); static struct device_attribute dev_attr_firmware_version = { - .attr = { .name = "firmware-version", .mode = S_IRUGO }, + .attr = { .name = "firmware-version", .mode = 0444 }, .show = aoedisk_show_fwver, }; -static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL); +static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL); static struct attribute *aoe_attrs[] = { &dev_attr_state.attr, @@ -388,7 +388,6 @@ aoeblk_gdalloc(void *vp) d->aoemajor, d->aoeminor); goto err_mempool; } - blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); spin_lock_irqsave(&d->lock, flags); WARN_ON(!(d->flags & DEVFL_GD_NOW)); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 540bb60cd071..096882e54095 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1032,8 +1032,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt) iter.bi_size = cnt; __bio_for_each_segment(bv, bio, iter, iter) { - char *p = page_address(bv.bv_page) + bv.bv_offset; + char *p = kmap_atomic(bv.bv_page) + bv.bv_offset; skb_copy_bits(skb, soff, p, bv.bv_len); + kunmap_atomic(p); soff += bv.bv_len; } } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 66cb0f857f64..bb976598ee43 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -331,15 +331,15 @@ static const struct block_device_operations brd_fops = { * And now the modules code and kernel interface. */ static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; -module_param(rd_nr, int, S_IRUGO); +module_param(rd_nr, int, 0444); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; -module_param(rd_size, ulong, S_IRUGO); +module_param(rd_size, ulong, 0444); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); static int max_part = 1; -module_param(max_part, int, S_IRUGO); +module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); MODULE_LICENSE("GPL"); @@ -402,6 +402,10 @@ static struct brd_device *brd_alloc(int i) set_capacity(disk, rd_size * 2); disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; + /* Tell the block layer that this is not a rotational device */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + return brd; out_free_queue: diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 9f4e6f502b84..11a85b740327 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -977,7 +977,7 @@ static void drbd_bm_endio(struct bio *bio) bm_page_unlock_io(device, idx); if (ctx->flags & BM_AIO_COPY_PAGES) - mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); + mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool); bio_put(bio); @@ -1014,7 +1014,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM); + page = mempool_alloc(&drbd_md_io_page_pool, + GFP_NOIO | __GFP_HIGHMEM); copy_highpage(page, b->bm_pages[page_nr]); bm_store_page_idx(page, page_nr); } else diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index ab21976a87b2..5d5e8d6a8a56 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -481,9 +481,9 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource) goto fail; resource->debugfs_res_connections = dentry; - dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP, - resource->debugfs_res, resource, - &in_flight_summary_fops); + dentry = debugfs_create_file("in_flight_summary", 0440, + resource->debugfs_res, resource, + &in_flight_summary_fops); if (IS_ERR_OR_NULL(dentry)) goto fail; resource->debugfs_res_in_flight_summary = dentry; @@ -645,16 +645,16 @@ void drbd_debugfs_connection_add(struct drbd_connection *connection) goto fail; connection->debugfs_conn = dentry; - dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP, - connection->debugfs_conn, connection, - &connection_callback_history_fops); + dentry = debugfs_create_file("callback_history", 0440, + connection->debugfs_conn, connection, + &connection_callback_history_fops); if (IS_ERR_OR_NULL(dentry)) goto fail; connection->debugfs_conn_callback_history = dentry; - dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP, - connection->debugfs_conn, connection, - &connection_oldest_requests_fops); + dentry = debugfs_create_file("oldest_requests", 0440, + connection->debugfs_conn, connection, + &connection_oldest_requests_fops); if (IS_ERR_OR_NULL(dentry)) goto fail; connection->debugfs_conn_oldest_requests = dentry; @@ -824,7 +824,7 @@ void drbd_debugfs_device_add(struct drbd_device *device) device->debugfs_minor = dentry; #define DCF(name) do { \ - dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \ + dentry = debugfs_create_file(#name, 0440, \ device->debugfs_vol, device, \ &device_ ## name ## _fops); \ if (IS_ERR_OR_NULL(dentry)) \ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 06ecee1b528e..bc4ed2ed40a2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1405,8 +1405,8 @@ extern struct kmem_cache *drbd_request_cache; extern struct kmem_cache *drbd_ee_cache; /* peer requests */ extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ -extern mempool_t *drbd_request_mempool; -extern mempool_t *drbd_ee_mempool; +extern mempool_t drbd_request_mempool; +extern mempool_t drbd_ee_mempool; /* drbd's page pool, used to buffer data received from the peer, * or data requested by the peer. @@ -1432,16 +1432,16 @@ extern wait_queue_head_t drbd_pp_wait; * 128 should be plenty, currently we probably can get away with as few as 1. */ #define DRBD_MIN_POOL_PAGES 128 -extern mempool_t *drbd_md_io_page_pool; +extern mempool_t drbd_md_io_page_pool; /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ -extern struct bio_set *drbd_md_io_bio_set; +extern struct bio_set drbd_md_io_bio_set; /* to allocate from that set */ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); /* And a bio_set for cloning */ -extern struct bio_set *drbd_io_bio_set; +extern struct bio_set drbd_io_bio_set; extern struct mutex resources_mutex; @@ -1643,7 +1643,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; -extern const struct file_operations drbd_proc_fops; +int drbd_seq_show(struct seq_file *seq, void *v); /* drbd_actlog.c */ extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 185f1ef00a7c..7655d6133139 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -124,11 +124,11 @@ struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* peer requests */ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ -mempool_t *drbd_request_mempool; -mempool_t *drbd_ee_mempool; -mempool_t *drbd_md_io_page_pool; -struct bio_set *drbd_md_io_bio_set; -struct bio_set *drbd_io_bio_set; +mempool_t drbd_request_mempool; +mempool_t drbd_ee_mempool; +mempool_t drbd_md_io_page_pool; +struct bio_set drbd_md_io_bio_set; +struct bio_set drbd_io_bio_set; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -153,10 +153,10 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask) { struct bio *bio; - if (!drbd_md_io_bio_set) + if (!bioset_initialized(&drbd_md_io_bio_set)) return bio_alloc(gfp_mask, 1); - bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set); if (!bio) return NULL; return bio; @@ -2097,16 +2097,11 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ - if (drbd_io_bio_set) - bioset_free(drbd_io_bio_set); - if (drbd_md_io_bio_set) - bioset_free(drbd_md_io_bio_set); - if (drbd_md_io_page_pool) - mempool_destroy(drbd_md_io_page_pool); - if (drbd_ee_mempool) - mempool_destroy(drbd_ee_mempool); - if (drbd_request_mempool) - mempool_destroy(drbd_request_mempool); + bioset_exit(&drbd_io_bio_set); + bioset_exit(&drbd_md_io_bio_set); + mempool_exit(&drbd_md_io_page_pool); + mempool_exit(&drbd_ee_mempool); + mempool_exit(&drbd_request_mempool); if (drbd_ee_cache) kmem_cache_destroy(drbd_ee_cache); if (drbd_request_cache) @@ -2116,11 +2111,6 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); - drbd_io_bio_set = NULL; - drbd_md_io_bio_set = NULL; - drbd_md_io_page_pool = NULL; - drbd_ee_mempool = NULL; - drbd_request_mempool = NULL; drbd_ee_cache = NULL; drbd_request_cache = NULL; drbd_bm_ext_cache = NULL; @@ -2133,18 +2123,7 @@ static int drbd_create_mempools(void) { struct page *page; const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; - int i; - - /* prepare our caches and mempools */ - drbd_request_mempool = NULL; - drbd_ee_cache = NULL; - drbd_request_cache = NULL; - drbd_bm_ext_cache = NULL; - drbd_al_ext_cache = NULL; - drbd_pp_pool = NULL; - drbd_md_io_page_pool = NULL; - drbd_md_io_bio_set = NULL; - drbd_io_bio_set = NULL; + int i, ret; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2168,26 +2147,26 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ - drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); - if (drbd_io_bio_set == NULL) + ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) goto Enomem; - drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, - BIOSET_NEED_BVECS); - if (drbd_md_io_bio_set == NULL) + ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0, + BIOSET_NEED_BVECS); + if (ret) goto Enomem; - drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); - if (drbd_md_io_page_pool == NULL) + ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0); + if (ret) goto Enomem; - drbd_request_mempool = mempool_create_slab_pool(number, - drbd_request_cache); - if (drbd_request_mempool == NULL) + ret = mempool_init_slab_pool(&drbd_request_mempool, number, + drbd_request_cache); + if (ret) goto Enomem; - drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache); - if (drbd_ee_mempool == NULL) + ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache); + if (ret) goto Enomem; /* drbd's page pool */ @@ -3010,7 +2989,7 @@ static int __init drbd_init(void) goto fail; err = -ENOMEM; - drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); + drbd_proc = proc_create_single("drbd", S_IFREG | 0444 , NULL, drbd_seq_show); if (!drbd_proc) { pr_err("unable to register proc file\n"); goto fail; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 582caeb0de86..74ef29247bb5 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -33,18 +33,7 @@ #include <linux/drbd.h> #include "drbd_int.h" -static int drbd_proc_open(struct inode *inode, struct file *file); -static int drbd_proc_release(struct inode *inode, struct file *file); - - struct proc_dir_entry *drbd_proc; -const struct file_operations drbd_proc_fops = { - .owner = THIS_MODULE, - .open = drbd_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = drbd_proc_release, -}; static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) { @@ -235,7 +224,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se } } -static int drbd_seq_show(struct seq_file *seq, void *v) +int drbd_seq_show(struct seq_file *seq, void *v) { int i, prev_i = -1; const char *sn; @@ -345,24 +334,3 @@ static int drbd_seq_show(struct seq_file *seq, void *v) return 0; } - -static int drbd_proc_open(struct inode *inode, struct file *file) -{ - int err; - - if (try_module_get(THIS_MODULE)) { - err = single_open(file, drbd_seq_show, NULL); - if (err) - module_put(THIS_MODULE); - return err; - } - return -ENODEV; -} - -static int drbd_proc_release(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - return single_release(inode, file); -} - -/* PROC FS stuff end */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c72dee0ef083..be9450f5ad1c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -378,7 +378,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) return NULL; - peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); + peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); if (!peer_req) { if (!(gfp_mask & __GFP_NOWARN)) drbd_err(device, "%s: allocation failed\n", __func__); @@ -409,7 +409,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto return peer_req; fail: - mempool_free(peer_req, drbd_ee_mempool); + mempool_free(peer_req, &drbd_ee_mempool); return NULL; } @@ -426,7 +426,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request * peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; drbd_al_complete_io(device, &peer_req->i); } - mempool_free(peer_req, drbd_ee_mempool); + mempool_free(peer_req, &drbd_ee_mempool); } int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a500e738d929..a47e4987ee46 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -55,7 +55,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio { struct drbd_request *req; - req = mempool_alloc(drbd_request_mempool, GFP_NOIO); + req = mempool_alloc(&drbd_request_mempool, GFP_NOIO); if (!req) return NULL; memset(req, 0, sizeof(*req)); @@ -184,7 +184,7 @@ void drbd_req_destroy(struct kref *kref) } } - mempool_free(req, drbd_request_mempool); + mempool_free(req, &drbd_request_mempool); } static void wake_all_senders(struct drbd_connection *connection) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index cb97b3b30962..94c654020f0f 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -269,7 +269,7 @@ enum drbd_req_state_bits { static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { struct bio *bio; - bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set); + bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set); req->private_bio = bio; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8ec7235fc93b..8871b5044d9e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4450,7 +4450,7 @@ static ssize_t floppy_cmos_show(struct device *dev, return sprintf(buf, "%X\n", UDP->cmos); } -static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL); +static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL); static struct attribute *floppy_dev_attrs[] = { &dev_attr_cmos.attr, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5d4e31655d96..4838b0dbaad3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -732,7 +732,7 @@ static ssize_t loop_attr_do_show_##_name(struct device *d, \ return loop_attr_show(d, b, loop_attr_##_name##_show); \ } \ static struct device_attribute loop_attr_##_name = \ - __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL); + __ATTR(_name, 0444, loop_attr_do_show_##_name, NULL); static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) { @@ -809,16 +809,17 @@ static struct attribute_group loop_attribute_group = { .attrs= loop_attrs, }; -static int loop_sysfs_init(struct loop_device *lo) +static void loop_sysfs_init(struct loop_device *lo) { - return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, - &loop_attribute_group); + lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); } static void loop_sysfs_exit(struct loop_device *lo) { - sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, - &loop_attribute_group); + if (lo->sysfs_inited) + sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); } static void loop_config_discard(struct loop_device *lo) @@ -1068,6 +1069,7 @@ static int loop_clr_fd(struct loop_device *lo) if (bdev) { bdput(bdev); invalidate_bdev(bdev); + bdev->bd_inode->i_mapping->wb_err = 0; } set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); @@ -1676,9 +1678,9 @@ static const struct block_device_operations lo_fops = { * And now the modules code and kernel interface. */ static int max_loop; -module_param(max_loop, int, S_IRUGO); +module_param(max_loop, int, 0444); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); -module_param(max_part, int, S_IRUGO); +module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index b78de9879f4f..4d42c7af7de7 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -58,6 +58,7 @@ struct loop_device { struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; + bool sysfs_inited; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 769c551e3d71..c73626decb46 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2285,7 +2285,7 @@ static ssize_t mtip_hw_show_status(struct device *dev, return size; } -static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); +static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL); /* debugsfs entries */ @@ -2566,10 +2566,9 @@ static int mtip_hw_debugfs_init(struct driver_data *dd) return -1; } - debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd, - &mtip_flags_fops); - debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd, - &mtip_regs_fops); + debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops); + debugfs_create_file("registers", 0444, dd->dfs_node, dd, + &mtip_regs_fops); return 0; } @@ -2726,15 +2725,11 @@ static void mtip_softirq_done_fn(struct request *rq) blk_mq_end_request(rq, cmd->status); } -static void mtip_abort_cmd(struct request *req, void *data, - bool reserved) +static void mtip_abort_cmd(struct request *req, void *data, bool reserved) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; - if (!blk_mq_request_started(req)) - return; - dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); @@ -2742,14 +2737,10 @@ static void mtip_abort_cmd(struct request *req, void *data, mtip_softirq_done_fn(req); } -static void mtip_queue_cmd(struct request *req, void *data, - bool reserved) +static void mtip_queue_cmd(struct request *req, void *data, bool reserved) { struct driver_data *dd = data; - if (!blk_mq_request_started(req)) - return; - set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); } @@ -3720,7 +3711,8 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_TIMEOUT; - return BLK_EH_HANDLED; + blk_mq_complete_request(req); + return BLK_EH_DONE; } if (test_bit(req->tag, dd->port->cmds_to_issue)) @@ -3862,7 +3854,6 @@ skip_create_disk: blk_queue_max_hw_sectors(dd->queue, 0xffff); blk_queue_max_segment_size(dd->queue, 0x400000); blk_queue_io_min(dd->queue, 4096); - blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask); /* Signal trim support */ if (dd->trim_supp == true) { @@ -4273,7 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, if (!dd->isr_workq) { dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); rv = -ENOMEM; - goto block_initialize_err; + goto setmask_err; } memset(cpu_list, 0, sizeof(cpu_list)); @@ -4614,7 +4605,7 @@ static int __init mtip_init(void) } if (dfs_parent) { dfs_device_status = debugfs_create_file("device_status", - S_IRUGO, dfs_parent, NULL, + 0444, dfs_parent, NULL, &mtip_device_status_fops); if (IS_ERR_OR_NULL(dfs_device_status)) { pr_err("Error creating device_status node\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index afbc202ca6fd..3ed1ef8ee528 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -166,16 +166,19 @@ static ssize_t pid_show(struct device *dev, } static const struct device_attribute pid_attr = { - .attr = { .name = "pid", .mode = S_IRUGO}, + .attr = { .name = "pid", .mode = 0444}, .show = pid_show, }; static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; + struct request_queue *q; + if (disk) { + q = disk->queue; del_gendisk(disk); - blk_cleanup_queue(disk->queue); + blk_cleanup_queue(q); blk_mq_free_tag_set(&nbd->tag_set); disk->private_data = NULL; put_disk(disk); @@ -213,7 +216,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, } if (!nsock->dead) { kernel_sock_shutdown(nsock->sock, SHUT_RDWR); - atomic_dec(&nbd->config->live_connections); + if (atomic_dec_return(&nbd->config->live_connections) == 0) { + if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, + &nbd->config->runtime_flags)) { + set_bit(NBD_DISCONNECTED, + &nbd->config->runtime_flags); + dev_info(nbd_to_dev(nbd), + "Disconnected due to user request.\n"); + } + } } nsock->dead = true; nsock->pending = NULL; @@ -231,9 +242,22 @@ static void nbd_size_clear(struct nbd_device *nbd) static void nbd_size_update(struct nbd_device *nbd) { struct nbd_config *config = nbd->config; + struct block_device *bdev = bdget_disk(nbd->disk, 0); + + if (config->flags & NBD_FLAG_SEND_TRIM) { + nbd->disk->queue->limits.discard_granularity = config->blksize; + blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); + } blk_queue_logical_block_size(nbd->disk->queue, config->blksize); blk_queue_physical_block_size(nbd->disk->queue, config->blksize); set_capacity(nbd->disk, config->bytesize >> 9); + if (bdev) { + if (bdev->bd_disk) + bd_set_size(bdev, config->bytesize); + else + bdev->bd_invalidated = 1; + bdput(bdev); + } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } @@ -243,6 +267,8 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, struct nbd_config *config = nbd->config; config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; + if (nbd->task_recv != NULL) + nbd_size_update(nbd); } static void nbd_complete_rq(struct request *req) @@ -286,13 +312,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; - return BLK_EH_HANDLED; + goto done; } config = nbd->config; if (config->num_connections > 1) { dev_err_ratelimited(nbd_to_dev(nbd), - "Connection timed out, retrying\n"); + "Connection timed out, retrying (%d/%d alive)\n", + atomic_read(&config->live_connections), + config->num_connections); /* * Hooray we have more connections, requeue this IO, the submit * path will put it on a real connection. @@ -314,7 +342,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } blk_mq_requeue_request(req, true); nbd_config_put(nbd); - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; } } else { dev_err_ratelimited(nbd_to_dev(nbd), @@ -324,8 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, cmd->status = BLK_STS_IOERR; sock_shutdown(nbd); nbd_config_put(nbd); - - return BLK_EH_HANDLED; +done: + blk_mq_complete_request(req); + return BLK_EH_DONE; } /* @@ -647,11 +676,8 @@ static void recv_work(struct work_struct *work) static void nbd_clear_req(struct request *req, void *data, bool reserved) { - struct nbd_cmd *cmd; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); } @@ -714,10 +740,9 @@ static int wait_for_reconnect(struct nbd_device *nbd) return 0; if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) return 0; - wait_event_timeout(config->conn_wait, - atomic_read(&config->live_connections), - config->dead_conn_timeout); - return atomic_read(&config->live_connections); + return wait_event_timeout(config->conn_wait, + atomic_read(&config->live_connections) > 0, + config->dead_conn_timeout) > 0; } static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) @@ -950,10 +975,6 @@ static void nbd_bdev_reset(struct block_device *bdev) if (bdev->bd_openers > 1) return; bd_set_size(bdev, 0); - if (max_part > 0) { - blkdev_reread_part(bdev); - bdev->bd_invalidated = 1; - } } static void nbd_parse_flags(struct nbd_device *nbd) @@ -1040,6 +1061,8 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; + nbd->disk->queue->limits.discard_granularity = 0; + blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); @@ -1109,7 +1132,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b if (ret) return ret; - bd_set_size(bdev, config->bytesize); if (max_part) bdev->bd_invalidated = 1; mutex_unlock(&nbd->config_lock); @@ -1118,7 +1140,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b if (ret) sock_shutdown(nbd); mutex_lock(&nbd->config_lock); - bd_set_size(bdev, 0); + nbd_bdev_reset(bdev); /* user requested, ignore socket errors */ if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) ret = 0; @@ -1269,6 +1291,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); + bdev->bd_invalidated = 1; + } else if (nbd_disconnected(nbd->config)) { + bdev->bd_invalidated = 1; } out: mutex_unlock(&nbd_index_mutex); @@ -1490,8 +1515,8 @@ static int nbd_dev_add(int index) */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); - disk->queue->limits.discard_granularity = 512; - blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + disk->queue->limits.discard_granularity = 0; + blk_queue_max_discard_sectors(disk->queue, 0); blk_queue_max_segment_size(disk->queue, UINT_MAX); blk_queue_max_segments(disk->queue, USHRT_MAX); blk_queue_max_hw_sectors(disk->queue, 65536); @@ -1755,6 +1780,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) } mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); + nbd_clear_sock(nbd); mutex_unlock(&nbd->config_lock); if (test_and_clear_bit(NBD_HAS_CONFIG_REF, &nbd->config->runtime_flags)) @@ -2093,7 +2119,8 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; recv_workqueue = alloc_workqueue("knbd-recv", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_UNBOUND, 0); if (!recv_workqueue) return -ENOMEM; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index a76553293a31..2bdadd7f1454 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -157,23 +157,23 @@ enum { }; static int g_no_sched; -module_param_named(no_sched, g_no_sched, int, S_IRUGO); +module_param_named(no_sched, g_no_sched, int, 0444); MODULE_PARM_DESC(no_sched, "No io scheduler"); static int g_submit_queues = 1; -module_param_named(submit_queues, g_submit_queues, int, S_IRUGO); +module_param_named(submit_queues, g_submit_queues, int, 0444); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); static int g_home_node = NUMA_NO_NODE; -module_param_named(home_node, g_home_node, int, S_IRUGO); +module_param_named(home_node, g_home_node, int, 0444); MODULE_PARM_DESC(home_node, "Home node for the device"); #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static char g_timeout_str[80]; -module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); +module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); static char g_requeue_str[80]; -module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO); +module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); #endif static int g_queue_mode = NULL_Q_MQ; @@ -203,27 +203,27 @@ static const struct kernel_param_ops null_queue_mode_param_ops = { .get = param_get_int, }; -device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO); +device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444); MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); static int g_gb = 250; -module_param_named(gb, g_gb, int, S_IRUGO); +module_param_named(gb, g_gb, int, 0444); MODULE_PARM_DESC(gb, "Size in GB"); static int g_bs = 512; -module_param_named(bs, g_bs, int, S_IRUGO); +module_param_named(bs, g_bs, int, 0444); MODULE_PARM_DESC(bs, "Block size (in bytes)"); static int nr_devices = 1; -module_param(nr_devices, int, S_IRUGO); +module_param(nr_devices, int, 0444); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); static bool g_blocking; -module_param_named(blocking, g_blocking, bool, S_IRUGO); +module_param_named(blocking, g_blocking, bool, 0444); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); static bool shared_tags; -module_param(shared_tags, bool, S_IRUGO); +module_param(shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); static int g_irqmode = NULL_IRQ_SOFTIRQ; @@ -239,19 +239,19 @@ static const struct kernel_param_ops null_irqmode_param_ops = { .get = param_get_int, }; -device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO); +device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444); MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); static unsigned long g_completion_nsec = 10000; -module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO); +module_param_named(completion_nsec, g_completion_nsec, ulong, 0444); MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); static int g_hw_queue_depth = 64; -module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO); +module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444); MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); static bool g_use_per_node_hctx; -module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO); +module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444); MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); static struct nullb_device *null_alloc_dev(void); @@ -1365,7 +1365,8 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) { pr_info("null: rq %p timed out\n", rq); - return BLK_EH_HANDLED; + blk_mq_complete_request(rq); + return BLK_EH_DONE; } static int null_rq_prep_fn(struct request_queue *q, struct request *req) @@ -1427,7 +1428,8 @@ static void null_request_fn(struct request_queue *q) static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) { pr_info("null: rq %p timed out\n", rq); - return BLK_EH_HANDLED; + blk_mq_complete_request(rq); + return BLK_EH_DONE; } static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 27a44b97393a..8961b190e256 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -740,7 +740,7 @@ static int pd_special_command(struct pd_unit *disk, { struct request *rq; - rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index c61d20c9f3f8..b3f83cd96f33 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -97,8 +97,8 @@ static int pktdev_major; static int write_congestion_on = PKT_WRITE_CONGESTION_ON; static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ -static mempool_t *psd_pool; -static struct bio_set *pkt_bio_set; +static mempool_t psd_pool; +static struct bio_set pkt_bio_set; static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ @@ -478,8 +478,8 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd) if (!pd->dfs_d_root) return; - pd->dfs_f_info = debugfs_create_file("info", S_IRUGO, - pd->dfs_d_root, pd, &debug_fops); + pd->dfs_f_info = debugfs_create_file("info", 0444, + pd->dfs_d_root, pd, &debug_fops); } static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) @@ -631,7 +631,7 @@ static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node) static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node) { rb_erase(&node->rb_node, &pd->bio_queue); - mempool_free(node, pd->rb_pool); + mempool_free(node, &pd->rb_pool); pd->bio_queue_size--; BUG_ON(pd->bio_queue_size < 0); } @@ -704,13 +704,13 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); if (cgc->buflen) { ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, - __GFP_RECLAIM); + GFP_NOIO); if (ret) goto out; } @@ -1285,7 +1285,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) * Fill-in bvec with data from orig_bios. */ spin_lock(&pkt->lock); - bio_copy_data(pkt->w_bio, pkt->orig_bios.head); + bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head); pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); spin_unlock(&pkt->lock); @@ -2303,14 +2303,14 @@ static void pkt_end_io_read_cloned(struct bio *bio) psd->bio->bi_status = bio->bi_status; bio_put(bio); bio_endio(psd->bio); - mempool_free(psd, psd_pool); + mempool_free(psd, &psd_pool); pkt_bio_finished(pd); } static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set); - struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); + struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set); + struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); psd->pd = pd; psd->bio = bio; @@ -2381,7 +2381,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) /* * No matching packet found. Store the bio in the work queue. */ - node = mempool_alloc(pd->rb_pool, GFP_NOIO); + node = mempool_alloc(&pd->rb_pool, GFP_NOIO); node->bio = bio; spin_lock(&pd->lock); BUG_ON(pd->bio_queue_size < 0); @@ -2451,7 +2451,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) split = bio_split(bio, last_zone - bio->bi_iter.bi_sector, - GFP_NOIO, pkt_bio_set); + GFP_NOIO, &pkt_bio_set); bio_chain(split, bio); } else { split = bio; @@ -2538,18 +2538,6 @@ static int pkt_seq_show(struct seq_file *m, void *p) return 0; } -static int pkt_seq_open(struct inode *inode, struct file *file) -{ - return single_open(file, pkt_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations pkt_proc_fops = { - .open = pkt_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; - static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { int i; @@ -2604,7 +2592,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) goto out_mem; } - proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd); + proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd); pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b)); return 0; @@ -2707,9 +2695,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!pd) goto out_mutex; - pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE, - sizeof(struct pkt_rb_node)); - if (!pd->rb_pool) + ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE, + sizeof(struct pkt_rb_node)); + if (ret) goto out_mem; INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); @@ -2766,7 +2754,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) out_mem2: put_disk(disk); out_mem: - mempool_destroy(pd->rb_pool); + mempool_exit(&pd->rb_pool); kfree(pd); out_mutex: mutex_unlock(&ctl_mutex); @@ -2817,7 +2805,7 @@ static int pkt_remove_dev(dev_t pkt_dev) blk_cleanup_queue(pd->disk->queue); put_disk(pd->disk); - mempool_destroy(pd->rb_pool); + mempool_exit(&pd->rb_pool); kfree(pd); /* This is safe: open() is still holding a reference. */ @@ -2914,14 +2902,14 @@ static int __init pkt_init(void) mutex_init(&ctl_mutex); - psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE, - sizeof(struct packet_stacked_data)); - if (!psd_pool) - return -ENOMEM; - pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!pkt_bio_set) { - mempool_destroy(psd_pool); - return -ENOMEM; + ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE, + sizeof(struct packet_stacked_data)); + if (ret) + return ret; + ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) { + mempool_exit(&psd_pool); + return ret; } ret = register_blkdev(pktdev_major, DRIVER_NAME); @@ -2954,8 +2942,8 @@ out_misc: out: unregister_blkdev(pktdev_major, DRIVER_NAME); out2: - mempool_destroy(psd_pool); - bioset_free(pkt_bio_set); + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); return ret; } @@ -2968,8 +2956,8 @@ static void __exit pkt_exit(void) pkt_sysfs_cleanup(); unregister_blkdev(pktdev_major, DRIVER_NAME); - mempool_destroy(psd_pool); - bioset_free(pkt_bio_set); + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); } MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 075662f2cf46..afe1508d82c6 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -465,8 +465,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) priv->queue = queue; queue->queuedata = dev; - blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); blk_queue_segment_boundary(queue, -1UL); blk_queue_dma_alignment(queue, dev->blk_size-1); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 6a55959cbf78..8fa4533a1249 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -521,26 +521,13 @@ static int ps3vram_proc_show(struct seq_file *m, void *v) return 0; } -static int ps3vram_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ps3vram_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ps3vram_proc_fops = { - .owner = THIS_MODULE, - .open = ps3vram_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void ps3vram_proc_init(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); struct proc_dir_entry *pde; - pde = proc_create_data(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops, - priv); + pde = proc_create_single_data(DEVICE_NAME, 0444, NULL, + ps3vram_proc_show, priv); if (!pde) dev_warn(&dev->core, "failed to create /proc entry\n"); } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 33b36fea1d73..af354047ac4b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -424,7 +424,7 @@ static struct workqueue_struct *rbd_wq; * single-major requires >= 0.75 version of userspace rbd utility. */ static bool single_major = true; -module_param(single_major, bool, S_IRUGO); +module_param(single_major, bool, 0444); MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)"); static ssize_t rbd_add(struct bus_type *bus, const char *buf, @@ -468,11 +468,11 @@ static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf) return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); } -static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); -static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); -static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); -static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); -static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL); +static BUS_ATTR(add, 0200, NULL, rbd_add); +static BUS_ATTR(remove, 0200, NULL, rbd_remove); +static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major); +static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major); +static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL); static struct attribute *rbd_bus_attrs[] = { &bus_attr_add.attr, @@ -4204,22 +4204,22 @@ static ssize_t rbd_image_refresh(struct device *dev, return size; } -static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); -static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); -static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); -static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL); -static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL); -static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); -static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL); -static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL); -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); -static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); -static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); -static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); -static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); -static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); -static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); -static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); +static DEVICE_ATTR(size, 0444, rbd_size_show, NULL); +static DEVICE_ATTR(features, 0444, rbd_features_show, NULL); +static DEVICE_ATTR(major, 0444, rbd_major_show, NULL); +static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL); +static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL); +static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL); +static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL); +static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL); +static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL); +static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL); +static DEVICE_ATTR(name, 0444, rbd_name_show, NULL); +static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL); +static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh); +static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL); +static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL); +static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL); static struct attribute *rbd_attrs[] = { &dev_attr_size.attr, diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 34997df132e2..09537bee387f 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -247,19 +247,19 @@ static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card) if (IS_ERR_OR_NULL(card->debugfs_dir)) goto failed_debugfs_dir; - debugfs_stats = debugfs_create_file("stats", S_IRUGO, + debugfs_stats = debugfs_create_file("stats", 0444, card->debugfs_dir, card, &debugfs_stats_fops); if (IS_ERR_OR_NULL(debugfs_stats)) goto failed_debugfs_stats; - debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO, + debugfs_pci_regs = debugfs_create_file("pci_regs", 0444, card->debugfs_dir, card, &debugfs_pci_regs_fops); if (IS_ERR_OR_NULL(debugfs_pci_regs)) goto failed_debugfs_pci_regs; - debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR, + debugfs_cram = debugfs_create_file("cram", 0644, card->debugfs_dir, card, &debugfs_cram_fops); if (IS_ERR_OR_NULL(debugfs_cram)) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 08586dc14e85..4d90e5eba2f5 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host) if (!crq) return NULL; - rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL); + rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0); if (IS_ERR(rq)) { spin_lock_irqsave(&host->lock, flags); carm_put_request(host, crq); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4a07593c2efd..23752dc99b00 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -298,7 +298,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct request *req; int err; - req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL); + req = blk_get_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev, return err; } -static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); +static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL); /* The queue's logical block size must be set before calling this */ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) @@ -576,10 +576,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, } static const struct device_attribute dev_attr_cache_type_ro = - __ATTR(cache_type, S_IRUGO, + __ATTR(cache_type, 0444, virtblk_cache_type_show, NULL); static const struct device_attribute dev_attr_cache_type_rw = - __ATTR(cache_type, S_IRUGO|S_IWUSR, + __ATTR(cache_type, 0644, virtblk_cache_type_show, virtblk_cache_type_store); static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 987d665e82de..b55b245e8052 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -98,7 +98,7 @@ MODULE_PARM_DESC(max_queues, * backend, 4KB page granularity is used. */ unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; -module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); /* * The LRU mechanism to clean the lists of persistent grants needs to diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 21c1be1eb226..66412eededda 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,7 +367,7 @@ int __init xen_blkif_interface_init(void) out: \ return sprintf(buf, format, result); \ } \ - static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + static DEVICE_ATTR(name, 0444, show_##name, NULL) VBD_SHOW_ALLRING(oo_req, "%llu\n"); VBD_SHOW_ALLRING(rd_req, "%llu\n"); @@ -403,7 +403,7 @@ static const struct attribute_group xen_vbdstat_group = { \ return sprintf(buf, format, ##args); \ } \ - static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + static DEVICE_ATTR(name, 0444, show_##name, NULL) VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); VBD_SHOW(mode, "%s\n", be->mode); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2a8e7813bd1a..ae00a82f350b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -129,13 +129,12 @@ static const struct block_device_operations xlvbd_block_fops; */ static unsigned int xen_blkif_max_segments = 32; -module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, - S_IRUGO); +module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444); MODULE_PARM_DESC(max_indirect_segments, "Maximum amount of segments in indirect requests (default is 32)"); static unsigned int xen_blkif_max_queues = 4; -module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO); +module_param_named(max_queues, xen_blkif_max_queues, uint, 0444); MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk"); /* @@ -143,7 +142,7 @@ MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per v * backend, 4KB page granularity is used. */ static unsigned int xen_blkif_max_ring_order; -module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); #define BLK_RING_SIZE(info) \ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index bfc566d3f31a..9adc8c3eb0fa 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2192,7 +2192,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { ret = PTR_ERR(rq); break; diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c index a5e2f9e557ea..53436c03dbce 100644 --- a/drivers/char/apm-emulation.c +++ b/drivers/char/apm-emulation.c @@ -461,19 +461,6 @@ static int proc_apm_show(struct seq_file *m, void *v) return 0; } - -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_proc_fops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif static int kapmd(void *arg) @@ -657,7 +644,7 @@ static int __init apm_init(void) wake_up_process(kapmd_tsk); #ifdef CONFIG_PROC_FS - proc_create("apm", 0, NULL, &apm_proc_fops); + proc_create_single("apm", 0, NULL, proc_apm_show); #endif ret = misc_register(&apm_device); diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index eb53cbadb68f..a5ecf6dae02e 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -345,18 +345,6 @@ static int ds1620_proc_therm_show(struct seq_file *m, void *v) fan_state[netwinder_get_fan()]); return 0; } - -static int ds1620_proc_therm_open(struct inode *inode, struct file *file) -{ - return single_open(file, ds1620_proc_therm_show, NULL); -} - -static const struct file_operations ds1620_proc_therm_fops = { - .open = ds1620_proc_therm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif static const struct file_operations ds1620_fops = { @@ -404,7 +392,7 @@ static int __init ds1620_init(void) return ret; #ifdef THERM_USE_PROC - if (!proc_create("therm", 0, NULL, &ds1620_proc_therm_fops)) + if (!proc_create_single("therm", 0, NULL, ds1620_proc_therm_show)) printk(KERN_ERR "therm: unable to register /proc/therm\n"); #endif diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c index dc62568b7dde..d9aab643997e 100644 --- a/drivers/char/efirtc.c +++ b/drivers/char/efirtc.c @@ -358,19 +358,6 @@ static int efi_rtc_proc_show(struct seq_file *m, void *v) return 0; } - -static int efi_rtc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, efi_rtc_proc_show, NULL); -} - -static const struct file_operations efi_rtc_proc_fops = { - .open = efi_rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init efi_rtc_init(void) { @@ -386,7 +373,7 @@ efi_rtc_init(void) return ret; } - dir = proc_create("driver/efirtc", 0, NULL, &efi_rtc_proc_fops); + dir = proc_create_single("driver/efirtc", 0, NULL, efi_rtc_proc_show); if (dir == NULL) { printk(KERN_ERR "efirtc: can't create /proc/driver/efirtc.\n"); misc_deregister(&efi_rtc_dev); diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 1bb9e7cc82e3..53cfe574d8d4 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -95,19 +95,6 @@ static const struct seq_operations misc_seq_ops = { .stop = misc_seq_stop, .show = misc_seq_show, }; - -static int misc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &misc_seq_ops); -} - -static const struct file_operations misc_proc_fops = { - .owner = THIS_MODULE, - .open = misc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif static int misc_open(struct inode *inode, struct file *file) @@ -282,7 +269,7 @@ static int __init misc_init(void) int err; struct proc_dir_entry *ret; - ret = proc_create("misc", 0, NULL, &misc_proc_fops); + ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops); misc_class = class_create(THIS_MODULE, "misc"); err = PTR_ERR(misc_class); if (IS_ERR(misc_class)) diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 678fa97e41fb..25264d65e716 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -389,22 +389,9 @@ static int nvram_proc_read(struct seq_file *seq, void *offset) return 0; } -static int nvram_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nvram_proc_read, NULL); -} - -static const struct file_operations nvram_proc_fops = { - .owner = THIS_MODULE, - .open = nvram_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int nvram_add_proc_fs(void) { - if (!proc_create("driver/nvram", 0, NULL, &nvram_proc_fops)) + if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read)) return -ENOMEM; return 0; } diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index aa502e9fb7fa..66b04194aa9f 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -2616,19 +2616,6 @@ static int mgslpc_proc_show(struct seq_file *m, void *v) return 0; } -static int mgslpc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mgslpc_proc_show, NULL); -} - -static const struct file_operations mgslpc_proc_fops = { - .owner = THIS_MODULE, - .open = mgslpc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int rx_alloc_buffers(MGSLPC_INFO *info) { /* each buffer has header and data */ @@ -2815,7 +2802,7 @@ static const struct tty_operations mgslpc_ops = { .tiocmget = tiocmget, .tiocmset = tiocmset, .get_icount = mgslpc_get_icount, - .proc_fops = &mgslpc_proc_fops, + .proc_show = mgslpc_proc_show, }; static int __init synclink_cs_init(void) diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 57dc546628b5..94fedeeec035 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -171,7 +171,7 @@ static void mask_rtc_irq_bit(unsigned char bit) #endif #ifdef CONFIG_PROC_FS -static int rtc_proc_open(struct inode *inode, struct file *file); +static int rtc_proc_show(struct seq_file *seq, void *v); #endif /* @@ -832,16 +832,6 @@ static struct miscdevice rtc_dev = { .fops = &rtc_fops, }; -#ifdef CONFIG_PROC_FS -static const struct file_operations rtc_proc_fops = { - .owner = THIS_MODULE, - .open = rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - static resource_size_t rtc_size; static struct resource * __init rtc_request_region(resource_size_t size) @@ -982,7 +972,7 @@ no_irq: } #ifdef CONFIG_PROC_FS - ent = proc_create("driver/rtc", 0, NULL, &rtc_proc_fops); + ent = proc_create_single("driver/rtc", 0, NULL, rtc_proc_show); if (!ent) printk(KERN_WARNING "rtc: Failed to register with procfs.\n"); #endif @@ -1201,11 +1191,6 @@ static int rtc_proc_show(struct seq_file *seq, void *v) #undef YN #undef NY } - -static int rtc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rtc_proc_show, NULL); -} #endif static void rtc_get_rtc_time(struct rtc_time *rtc_tm) diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index 5488516da8ea..802376fe851a 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -326,19 +326,6 @@ static int proc_toshiba_show(struct seq_file *m, void *v) key); return 0; } - -static int proc_toshiba_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_toshiba_show, NULL); -} - -static const struct file_operations proc_toshiba_fops = { - .owner = THIS_MODULE, - .open = proc_toshiba_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif @@ -524,7 +511,7 @@ static int __init toshiba_init(void) { struct proc_dir_entry *pde; - pde = proc_create("toshiba", 0, NULL, &proc_toshiba_fops); + pde = proc_create_single("toshiba", 0, NULL, proc_toshiba_show); if (!pde) { misc_deregister(&tosh_device); return -ENOMEM; diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 8615594bd065..e718b8c69a56 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -260,19 +260,6 @@ static int cn_proc_show(struct seq_file *m, void *v) return 0; } -static int cn_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cn_proc_show, NULL); -} - -static const struct file_operations cn_file_ops = { - .owner = THIS_MODULE, - .open = cn_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; - static struct cn_dev cdev = { .input = cn_rx_skb, }; @@ -297,7 +284,7 @@ static int cn_init(void) cn_already_initialized = 1; - proc_create("connector", S_IRUGO, init_net.proc_net, &cn_file_ops); + proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show); return 0; } diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index d4a81be0d7d2..b6be62025325 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -152,8 +152,8 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv) EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS; writel(val, EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL); - memset(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0, - EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32)); + memset_io(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0, + EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32)); eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL, EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN); diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index 000c7019ca7d..d64edeb6771a 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -398,7 +398,7 @@ static int __init hidma_mgmt_of_populate_channels(struct device_node *np) } of_node_get(child); new_pdev->dev.of_node = child; - of_dma_configure(&new_pdev->dev, child); + of_dma_configure(&new_pdev->dev, child, true); /* * It is assumed that calling of_msi_configure is safe on * platforms with or without MSI support. diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index dfbd894d5bb7..4e24e591ae74 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -147,7 +147,7 @@ static u32 smc(u32 cmd_addr) "smc #0 @ switch to secure world\n" : "=r" (r0) : "r" (r0), "r" (r1), "r" (r2) - : "r3"); + : "r3", "r12"); } while (r0 == QCOM_SCM_INTERRUPTED); return r0; @@ -263,7 +263,7 @@ static s32 qcom_scm_call_atomic1(u32 svc, u32 cmd, u32 arg1) "smc #0 @ switch to secure world\n" : "=r" (r0) : "r" (r0), "r" (r1), "r" (r2) - : "r3"); + : "r3", "r12"); return r0; } @@ -298,7 +298,7 @@ static s32 qcom_scm_call_atomic2(u32 svc, u32 cmd, u32 arg1, u32 arg2) "smc #0 @ switch to secure world\n" : "=r" (r0) : "r" (r0), "r" (r1), "r" (r2), "r" (r3) - ); + : "r12"); return r0; } @@ -328,7 +328,7 @@ u32 qcom_scm_get_version(void) "smc #0 @ switch to secure world\n" : "=r" (r0), "=r" (r1) : "r" (r0), "r" (r1) - : "r2", "r3"); + : "r2", "r3", "r12"); } while (r0 == QCOM_SCM_INTERRUPTED); version = r1; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1dd1142246c2..27579443cdc5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4555,8 +4555,8 @@ static int dm_update_crtcs_state(struct dc *dc, for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = NULL; struct amdgpu_dm_connector *aconnector = NULL; - struct drm_connector_state *new_con_state = NULL; - struct dm_connector_state *dm_conn_state = NULL; + struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; + struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; struct drm_plane_state *new_plane_state = NULL; new_stream = NULL; @@ -4577,19 +4577,23 @@ static int dm_update_crtcs_state(struct dc *dc, /* TODO This hack should go away */ if (aconnector && enable) { // Make sure fake sink is created in plug-in scenario - new_con_state = drm_atomic_get_connector_state(state, + drm_new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); + drm_old_conn_state = drm_atomic_get_old_connector_state(state, + &aconnector->base); - if (IS_ERR(new_con_state)) { - ret = PTR_ERR_OR_ZERO(new_con_state); + + if (IS_ERR(drm_new_conn_state)) { + ret = PTR_ERR_OR_ZERO(drm_new_conn_state); break; } - dm_conn_state = to_dm_connector_state(new_con_state); + dm_new_conn_state = to_dm_connector_state(drm_new_conn_state); + dm_old_conn_state = to_dm_connector_state(drm_old_conn_state); new_stream = create_stream_for_sink(aconnector, &new_crtc_state->mode, - dm_conn_state); + dm_new_conn_state); /* * we can have no stream on ACTION_SET if a display @@ -4695,20 +4699,30 @@ next_crtc: * We want to do dc stream updates that do not require a * full modeset below. */ - if (!enable || !aconnector || modereset_required(new_crtc_state)) + if (!(enable && aconnector && new_crtc_state->enable && + new_crtc_state->active)) continue; /* * Given above conditions, the dc state cannot be NULL because: - * 1. We're attempting to enable a CRTC. Which has a... - * 2. Valid connector attached, and - * 3. User does not want to reset it (disable or mark inactive, - * which can happen on a CRTC that's already disabled). - * => It currently exists. + * 1. We're in the process of enabling CRTCs (just been added + * to the dc context, or already is on the context) + * 2. Has a valid connector attached, and + * 3. Is currently active and enabled. + * => The dc stream state currently exists. */ BUG_ON(dm_new_crtc_state->stream == NULL); - /* Color managment settings */ - if (dm_new_crtc_state->base.color_mgmt_changed) { + /* Scaling or underscan settings */ + if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state)) + update_stream_scaling_settings( + &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream); + + /* + * Color management settings. We also update color properties + * when a modeset is needed, to ensure it gets reprogrammed. + */ + if (dm_new_crtc_state->base.color_mgmt_changed || + drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state); if (ret) goto fail; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index ec8d0006ef7c..3c136f2b954f 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2077,7 +2077,7 @@ static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id) return ret; } -void __dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense) +void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense) { mutex_lock(&hdmi->mutex); @@ -2103,13 +2103,6 @@ void __dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense) } mutex_unlock(&hdmi->mutex); } - -void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense) -{ - struct dw_hdmi *hdmi = dev_get_drvdata(dev); - - __dw_hdmi_setup_rx_sense(hdmi, hpd, rx_sense); -} EXPORT_SYMBOL_GPL(dw_hdmi_setup_rx_sense); static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) @@ -2145,9 +2138,9 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) */ if (intr_stat & (HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD)) { - __dw_hdmi_setup_rx_sense(hdmi, - phy_stat & HDMI_PHY_HPD, - phy_stat & HDMI_PHY_RX_SENSE); + dw_hdmi_setup_rx_sense(hdmi, + phy_stat & HDMI_PHY_HPD, + phy_stat & HDMI_PHY_RX_SENSE); if ((phy_stat & (HDMI_PHY_RX_SENSE | HDMI_PHY_HPD)) == 0) cec_notifier_set_phys_addr(hdmi->cec_notifier, diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index ffe14ec3e7f2..70ae1f232331 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1145,6 +1145,7 @@ int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE]) static const u16 psr_setup_time_us[] = { PSR_SETUP_TIME(330), PSR_SETUP_TIME(275), + PSR_SETUP_TIME(220), PSR_SETUP_TIME(165), PSR_SETUP_TIME(110), PSR_SETUP_TIME(55), diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 3ace929dd90f..3f502eef2431 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/nospec.h> + #include "i915_drv.h" #include "i915_query.h" #include <uapi/drm/i915_drm.h> @@ -100,7 +102,7 @@ int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; i < args->num_items; i++, user_item_ptr++) { struct drm_i915_query_item item; - u64 func_idx; + unsigned long func_idx; int ret; if (copy_from_user(&item, user_item_ptr, sizeof(item))) @@ -109,12 +111,17 @@ int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (item.query_id == 0) return -EINVAL; + if (overflows_type(item.query_id - 1, unsigned long)) + return -EINVAL; + func_idx = item.query_id - 1; - if (func_idx < ARRAY_SIZE(i915_query_funcs)) + ret = -EINVAL; + if (func_idx < ARRAY_SIZE(i915_query_funcs)) { + func_idx = array_index_nospec(func_idx, + ARRAY_SIZE(i915_query_funcs)); ret = i915_query_funcs[func_idx](dev_priv, &item); - else - ret = -EINVAL; + } /* Only write the length back to userspace if they differ. */ if (ret != item.length && put_user(ret, &user_item_ptr->length)) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 8691c86f579c..e125d16a1aa7 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -574,6 +574,36 @@ exit: return NOTIFY_OK; } +static int +intel_lvds_connector_register(struct drm_connector *connector) +{ + struct intel_lvds_connector *lvds = to_lvds_connector(connector); + int ret; + + ret = intel_connector_register(connector); + if (ret) + return ret; + + lvds->lid_notifier.notifier_call = intel_lid_notify; + if (acpi_lid_notifier_register(&lvds->lid_notifier)) { + DRM_DEBUG_KMS("lid notifier registration failed\n"); + lvds->lid_notifier.notifier_call = NULL; + } + + return 0; +} + +static void +intel_lvds_connector_unregister(struct drm_connector *connector) +{ + struct intel_lvds_connector *lvds = to_lvds_connector(connector); + + if (lvds->lid_notifier.notifier_call) + acpi_lid_notifier_unregister(&lvds->lid_notifier); + + intel_connector_unregister(connector); +} + /** * intel_lvds_destroy - unregister and free LVDS structures * @connector: connector to free @@ -586,9 +616,6 @@ static void intel_lvds_destroy(struct drm_connector *connector) struct intel_lvds_connector *lvds_connector = to_lvds_connector(connector); - if (lvds_connector->lid_notifier.notifier_call) - acpi_lid_notifier_unregister(&lvds_connector->lid_notifier); - if (!IS_ERR_OR_NULL(lvds_connector->base.edid)) kfree(lvds_connector->base.edid); @@ -609,8 +636,8 @@ static const struct drm_connector_funcs intel_lvds_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, - .late_register = intel_connector_register, - .early_unregister = intel_connector_unregister, + .late_register = intel_lvds_connector_register, + .early_unregister = intel_lvds_connector_unregister, .destroy = intel_lvds_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = intel_digital_connector_duplicate_state, @@ -827,6 +854,14 @@ static const struct dmi_system_id intel_no_lvds[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "D525MW"), }, }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Radiant P845", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Radiant Systems Inc"), + DMI_MATCH(DMI_PRODUCT_NAME, "P845"), + }, + }, { } /* terminating entry */ }; @@ -1150,12 +1185,6 @@ out: lvds_encoder->a3_power = lvds & LVDS_A3_POWER_MASK; - lvds_connector->lid_notifier.notifier_call = intel_lid_notify; - if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) { - DRM_DEBUG_KMS("lid notifier registration failed\n"); - lvds_connector->lid_notifier.notifier_call = NULL; - } - return; failed: diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index a393095aac1a..c9ad45686e7a 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -529,7 +529,7 @@ static irqreturn_t dw_hdmi_top_thread_irq(int irq, void *dev_id) if (stat & HDMITX_TOP_INTR_HPD_RISE) hpd_connected = true; - dw_hdmi_setup_rx_sense(dw_hdmi->dev, hpd_connected, + dw_hdmi_setup_rx_sense(dw_hdmi->hdmi, hpd_connected, hpd_connected); drm_helper_hpd_irq_event(dw_hdmi->encoder.dev); diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c index 68a40ae26f5b..1e2c931f6acf 100644 --- a/drivers/gpu/drm/omapdrm/dss/sdi.c +++ b/drivers/gpu/drm/omapdrm/dss/sdi.c @@ -82,7 +82,7 @@ static int sdi_calc_clock_div(struct sdi_device *sdi, unsigned long pclk, struct dispc_clock_info *dispc_cinfo) { int i; - struct sdi_clk_calc_ctx ctx = { .sdi = sdi }; + struct sdi_clk_calc_ctx ctx; /* * DSS fclk gives us very few possibilities, so finding a good pixel @@ -95,6 +95,9 @@ static int sdi_calc_clock_div(struct sdi_device *sdi, unsigned long pclk, bool ok; memset(&ctx, 0, sizeof(ctx)); + + ctx.sdi = sdi; + if (pclk > 1000 * i * i * i) ctx.pck_min = max(pclk - 1000 * i * i * i, 0lu); else diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 3d2d3bbd1342..155ad840f3c5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -88,6 +88,9 @@ static int rcar_lvds_connector_atomic_check(struct drm_connector *connector, const struct drm_display_mode *panel_mode; struct drm_crtc_state *crtc_state; + if (!state->crtc) + return 0; + if (list_empty(&connector->modes)) { dev_dbg(lvds->dev, "connector: empty modes list\n"); return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 70e1a8820a7c..8b770a8e02cd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1278,8 +1278,6 @@ static void vmw_master_drop(struct drm_device *dev, dev_priv->active_master = &dev_priv->fbdev_master; ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); ttm_vt_unlock(&dev_priv->fbdev_master.lock); - - vmw_fb_refresh(dev_priv); } /** @@ -1483,7 +1481,6 @@ static int vmw_pm_freeze(struct device *kdev) vmw_kms_resume(dev); if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - vmw_fb_refresh(dev_priv); return -EBUSY; } @@ -1523,8 +1520,6 @@ static int vmw_pm_restore(struct device *kdev) if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - vmw_fb_refresh(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index f34f368c1a2e..5fcbe1620d50 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -910,7 +910,6 @@ int vmw_fb_init(struct vmw_private *vmw_priv); int vmw_fb_close(struct vmw_private *dev_priv); int vmw_fb_off(struct vmw_private *vmw_priv); int vmw_fb_on(struct vmw_private *vmw_priv); -void vmw_fb_refresh(struct vmw_private *vmw_priv); /** * Kernel modesetting - vmwgfx_kms.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index ba0cdb743c3e..54e300365a5c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -866,21 +866,13 @@ int vmw_fb_on(struct vmw_private *vmw_priv) spin_lock_irqsave(&par->dirty.lock, flags); par->dirty.active = true; spin_unlock_irqrestore(&par->dirty.lock, flags); - - return 0; -} -/** - * vmw_fb_refresh - Refresh fb display - * - * @vmw_priv: Pointer to device private - * - * Call into kms to show the fbdev display(s). - */ -void vmw_fb_refresh(struct vmw_private *vmw_priv) -{ - if (!vmw_priv->fb_info) - return; + /* + * Need to reschedule a dirty update, because otherwise that's + * only done in dirty_mark() if the previous coalesced + * dirty region was empty. + */ + schedule_delayed_work(&par->local_work, 0); - vmw_fb_set_par(vmw_priv->fb_info); + return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index cdff99211602..21d746bdc922 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -329,8 +329,6 @@ int vmw_host_get_guestinfo(const char *guest_info_param, struct rpc_channel channel; char *msg, *reply = NULL; size_t reply_len = 0; - int ret = 0; - if (!vmw_msg_enabled) return -ENODEV; @@ -344,15 +342,14 @@ int vmw_host_get_guestinfo(const char *guest_info_param, return -ENOMEM; } - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || - vmw_send_msg(&channel, msg) || - vmw_recv_msg(&channel, (void *) &reply, &reply_len) || - vmw_close_channel(&channel)) { - DRM_ERROR("Failed to get %s", guest_info_param); + if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) + goto out_open; - ret = -EINVAL; - } + if (vmw_send_msg(&channel, msg) || + vmw_recv_msg(&channel, (void *) &reply, &reply_len)) + goto out_msg; + vmw_close_channel(&channel); if (buffer && reply && reply_len > 0) { /* Remove reply code, which are the first 2 characters of * the reply @@ -369,7 +366,17 @@ int vmw_host_get_guestinfo(const char *guest_info_param, kfree(reply); kfree(msg); - return ret; + return 0; + +out_msg: + vmw_close_channel(&channel); + kfree(reply); +out_open: + *length = 0; + kfree(msg); + DRM_ERROR("Failed to get %s", guest_info_param); + + return -EINVAL; } @@ -400,15 +407,22 @@ int vmw_host_log(const char *log) return -ENOMEM; } - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || - vmw_send_msg(&channel, msg) || - vmw_close_channel(&channel)) { - DRM_ERROR("Failed to send log\n"); + if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) + goto out_open; - ret = -EINVAL; - } + if (vmw_send_msg(&channel, msg)) + goto out_msg; + vmw_close_channel(&channel); kfree(msg); - return ret; + return 0; + +out_msg: + vmw_close_channel(&channel); +out_open: + kfree(msg); + DRM_ERROR("Failed to send log\n"); + + return -EINVAL; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h index 557a033fb610..8545488aa0cf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h @@ -135,17 +135,24 @@ #else -/* In the 32-bit version of this macro, we use "m" because there is no - * more register left for bp +/* + * In the 32-bit version of this macro, we store bp in a memory location + * because we've ran out of registers. + * Now we can't reference that memory location while we've modified + * %esp or %ebp, so we first push it on the stack, just before we push + * %ebp, and then when we need it we read it from the stack where we + * just pushed it. */ #define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di, \ port_num, magic, bp, \ eax, ebx, ecx, edx, si, di) \ ({ \ - asm volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + asm volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep outsb;" \ - "pop %%ebp;" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(eax), \ "=b"(ebx), \ "=c"(ecx), \ @@ -167,10 +174,12 @@ port_num, magic, bp, \ eax, ebx, ecx, edx, si, di) \ ({ \ - asm volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + asm volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep insb;" \ - "pop %%ebp" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(eax), \ "=b"(ebx), \ "=c"(ecx), \ diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 88a3558b7916..815bdb42e3f0 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -314,6 +314,11 @@ static int host1x_device_match(struct device *dev, struct device_driver *drv) return strcmp(dev_name(dev), drv->name) == 0; } +static int host1x_dma_configure(struct device *dev) +{ + return of_dma_configure(dev, dev->of_node, true); +} + static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -326,8 +331,8 @@ static const struct dev_pm_ops host1x_device_pm_ops = { struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, + .dma_configure = host1x_dma_configure, .pm = &host1x_device_pm_ops, - .force_dma = true, }; static void __host1x_device_del(struct host1x_device *device) @@ -416,7 +421,7 @@ static int host1x_device_add(struct host1x *host1x, device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; - of_dma_configure(&device->dev, host1x->dev->of_node); + of_dma_configure(&device->dev, host1x->dev->of_node, true); err = host1x_device_parse_dt(device, driver); if (err < 0) { diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index ede388309376..634f58042c77 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -733,8 +733,8 @@ err_nomem: /* Reset the page to write-back before releasing */ set_memory_wb((unsigned long)win->block[i].bdesc, 1); #endif - dma_free_coherent(msc_dev(msc), size, win->block[i].bdesc, - win->block[i].addr); + dma_free_coherent(msc_dev(msc)->parent->parent, size, + win->block[i].bdesc, win->block[i].addr); } kfree(win); @@ -769,7 +769,7 @@ static void msc_buffer_win_free(struct msc *msc, struct msc_window *win) /* Reset the page to write-back before releasing */ set_memory_wb((unsigned long)win->block[i].bdesc, 1); #endif - dma_free_coherent(msc_dev(win->msc), PAGE_SIZE, + dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, win->block[i].bdesc, win->block[i].addr); } diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 05386b76465e..10bcb5d73f90 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -19,6 +19,7 @@ #include <linux/stm.h> #include <linux/fs.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include "stm.h" #include <uapi/linux/stm.h> @@ -674,7 +675,7 @@ static void stm_device_release(struct device *dev) { struct stm_device *stm = to_stm_device(dev); - kfree(stm); + vfree(stm); } int stm_register_device(struct device *parent, struct stm_data *stm_data, @@ -691,7 +692,7 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data, return -EINVAL; nmasters = stm_data->sw_end - stm_data->sw_start + 1; - stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL); + stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *)); if (!stm) return -ENOMEM; @@ -744,7 +745,7 @@ err_device: /* matches device_initialize() above */ put_device(&stm->dev); err_free: - kfree(stm); + vfree(stm); return err; } diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 8c42ca7107b2..45ae3c025bf6 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -1,6 +1,6 @@ /* * i2c-ocores.c: I2C bus driver for OpenCores I2C controller - * (http://www.opencores.org/projects.cgi/web/i2c/overview). + * (https://opencores.org/project/i2c/overview) * * Peter Korsgaard <jacmet@sunsite.dk> * diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 0e6bc631a1ca..8b2b72b93885 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, struct request *rq; int error; - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; rq->special = (char *)pc; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 5a8e8e3c22cd..5f178384876f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -437,7 +437,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, bool delay = false; rq = blk_get_request(drive->queue, - write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB); ide_req(rq)->type = ATA_PRIV_PC; rq->rq_flags |= rq_flags; @@ -1426,21 +1426,8 @@ static int idecd_capacity_proc_show(struct seq_file *m, void *v) return 0; } -static int idecd_capacity_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idecd_capacity_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idecd_capacity_proc_fops = { - .owner = THIS_MODULE, - .open = idecd_capacity_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static ide_proc_entry_t idecd_proc[] = { - { "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops }, + { "capacity", S_IFREG|S_IRUGO, idecd_capacity_proc_show }, {} }; diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 2acca12b9c94..b1322400887b 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) struct request *rq; int ret; - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; rq->rq_flags = RQF_QUIET; blk_execute_rq(drive->queue, cd->disk, rq, 0); diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index 4e20747af32e..f4f8afdf8bbe 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, if (!(setting->flags & DS_SYNC)) return setting->set(drive, arg); - rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(q, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; scsi_req(rq)->cmd_len = 5; scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index f1a7c58fe418..e3b4e659082d 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -478,7 +478,7 @@ static int set_multcount(ide_drive_t *drive, int arg) if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) return -EBUSY; - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_TASKFILE; drive->mult_req = arg; diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c index 82a36ced4e96..95d239b2f646 100644 --- a/drivers/ide/ide-disk_proc.c +++ b/drivers/ide/ide-disk_proc.c @@ -52,19 +52,6 @@ static int idedisk_cache_proc_show(struct seq_file *m, void *v) return 0; } -static int idedisk_cache_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idedisk_cache_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idedisk_cache_proc_fops = { - .owner = THIS_MODULE, - .open = idedisk_cache_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int idedisk_capacity_proc_show(struct seq_file *m, void *v) { ide_drive_t*drive = (ide_drive_t *)m->private; @@ -73,19 +60,6 @@ static int idedisk_capacity_proc_show(struct seq_file *m, void *v) return 0; } -static int idedisk_capacity_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idedisk_capacity_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idedisk_capacity_proc_fops = { - .owner = THIS_MODULE, - .open = idedisk_capacity_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd) { u8 *buf; @@ -114,43 +88,17 @@ static int idedisk_sv_proc_show(struct seq_file *m, void *v) return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES); } -static int idedisk_sv_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idedisk_sv_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idedisk_sv_proc_fops = { - .owner = THIS_MODULE, - .open = idedisk_sv_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int idedisk_st_proc_show(struct seq_file *m, void *v) { return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS); } -static int idedisk_st_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idedisk_st_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idedisk_st_proc_fops = { - .owner = THIS_MODULE, - .open = idedisk_st_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - ide_proc_entry_t ide_disk_proc[] = { - { "cache", S_IFREG|S_IRUGO, &idedisk_cache_proc_fops }, - { "capacity", S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops }, - { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, - { "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops }, - { "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops }, + { "cache", S_IFREG|S_IRUGO, idedisk_cache_proc_show }, + { "capacity", S_IFREG|S_IRUGO, idedisk_capacity_proc_show }, + { "geometry", S_IFREG|S_IRUGO, ide_geometry_proc_show }, + { "smart_values", S_IFREG|S_IRUSR, idedisk_sv_proc_show }, + { "smart_thresholds", S_IFREG|S_IRUSR, idedisk_st_proc_show }, {} }; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 54d4d78ca46a..6f344654ef22 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -180,7 +180,6 @@ EXPORT_SYMBOL_GPL(ide_dma_unmap_sg); void ide_dma_off_quietly(ide_drive_t *drive) { drive->dev_flags &= ~IDE_DFLAG_USING_DMA; - ide_toggle_bounce(drive, 0); drive->hwif->dma_ops->dma_host_set(drive, 0); } @@ -211,7 +210,6 @@ EXPORT_SYMBOL(ide_dma_off); void ide_dma_on(ide_drive_t *drive) { drive->dev_flags |= IDE_DFLAG_USING_DMA; - ide_toggle_bounce(drive, 1); drive->hwif->dma_ops->dma_host_set(drive, 1); } diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c index 471457ebea67..7f697ddb5fe5 100644 --- a/drivers/ide/ide-floppy_proc.c +++ b/drivers/ide/ide-floppy_proc.c @@ -14,22 +14,9 @@ static int idefloppy_capacity_proc_show(struct seq_file *m, void *v) return 0; } -static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idefloppy_capacity_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idefloppy_capacity_proc_fops = { - .owner = THIS_MODULE, - .open = idefloppy_capacity_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - ide_proc_entry_t ide_floppy_proc[] = { - { "capacity", S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops }, - { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, + { "capacity", S_IFREG|S_IRUGO, idefloppy_capacity_proc_show }, + { "geometry", S_IFREG|S_IRUGO, ide_geometry_proc_show }, {} }; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 3661abb16a5f..af5119a73689 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) if (NULL == (void *) arg) { struct request *rq; - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_TASKFILE; blk_execute_rq(drive->queue, NULL, rq, 0); err = scsi_req(rq)->result ? -EIO : 0; @@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive) struct request *rq; int ret = 0; - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; scsi_req(rq)->cmd_len = 1; scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index e1180fa46196..78cb79eddc8b 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -6,32 +6,6 @@ #include <linux/ide.h> #include <linux/bitops.h> -/** - * ide_toggle_bounce - handle bounce buffering - * @drive: drive to update - * @on: on/off boolean - * - * Enable or disable bounce buffering for the device. Drives move - * between PIO and DMA and that changes the rules we need. - */ - -void ide_toggle_bounce(ide_drive_t *drive, int on) -{ - u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ - - if (!PCI_DMA_BUS_IS_PHYS) { - addr = BLK_BOUNCE_ANY; - } else if (on && drive->media == ide_disk) { - struct device *dev = drive->hwif->dev; - - if (dev && dev->dma_mask) - addr = *dev->dma_mask; - } - - if (drive->queue) - blk_queue_bounce_limit(drive->queue, addr); -} - u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48) { struct ide_taskfile *tf = &cmd->tf; diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 6465bcc7cea6..622f0edb3945 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -32,7 +32,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) } spin_unlock_irq(&hwif->lock); - rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(q, REQ_OP_DRV_IN, 0); scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; @@ -47,7 +47,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) * Make sure that *some* command is sent to the drive after the * timeout has expired, so power management will be reenabled. */ - rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT); + rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT); if (IS_ERR(rq)) goto out; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ad8a125defdd..59217aa1d1fb 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; @@ -90,8 +90,7 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN, - BLK_MQ_REQ_PREEMPT); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); ide_req(rq)->type = ATA_PRIV_PM_RESUME; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 2019e66eada7..56d7bc228cb3 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -796,8 +796,7 @@ static int ide_init_queue(ide_drive_t *drive) * This will be fixed once we teach pci_map_sg() about our boundary * requirements, hopefully soon. *FIXME* */ - if (!PCI_DMA_BUS_IS_PHYS) - max_sg_entries >>= 1; + max_sg_entries >>= 1; #endif /* CONFIG_PCI */ blk_queue_max_segments(q, max_sg_entries); @@ -805,9 +804,6 @@ static int ide_init_queue(ide_drive_t *drive) /* assign drive queue */ drive->queue = q; - /* needs drive->queue to be set */ - ide_toggle_bounce(drive, 1); - return 0; } diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 863db44c7916..45c997430332 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -56,19 +56,6 @@ static int ide_imodel_proc_show(struct seq_file *m, void *v) return 0; } -static int ide_imodel_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_imodel_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ide_imodel_proc_fops = { - .owner = THIS_MODULE, - .open = ide_imodel_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ide_mate_proc_show(struct seq_file *m, void *v) { ide_hwif_t *hwif = (ide_hwif_t *) m->private; @@ -80,19 +67,6 @@ static int ide_mate_proc_show(struct seq_file *m, void *v) return 0; } -static int ide_mate_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_mate_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ide_mate_proc_fops = { - .owner = THIS_MODULE, - .open = ide_mate_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ide_channel_proc_show(struct seq_file *m, void *v) { ide_hwif_t *hwif = (ide_hwif_t *) m->private; @@ -101,19 +75,6 @@ static int ide_channel_proc_show(struct seq_file *m, void *v) return 0; } -static int ide_channel_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_channel_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ide_channel_proc_fops = { - .owner = THIS_MODULE, - .open = ide_channel_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ide_identify_proc_show(struct seq_file *m, void *v) { ide_drive_t *drive = (ide_drive_t *)m->private; @@ -141,19 +102,6 @@ static int ide_identify_proc_show(struct seq_file *m, void *v) return 0; } -static int ide_identify_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_identify_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ide_identify_proc_fops = { - .owner = THIS_MODULE, - .open = ide_identify_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /** * ide_find_setting - find a specific setting * @st: setting table pointer @@ -441,27 +389,14 @@ static const struct file_operations ide_settings_proc_fops = { .write = ide_settings_proc_write, }; -static int ide_capacity_proc_show(struct seq_file *m, void *v) +int ide_capacity_proc_show(struct seq_file *m, void *v) { seq_printf(m, "%llu\n", (long long)0x7fffffff); return 0; } +EXPORT_SYMBOL_GPL(ide_capacity_proc_show); -static int ide_capacity_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_capacity_proc_show, NULL); -} - -const struct file_operations ide_capacity_proc_fops = { - .owner = THIS_MODULE, - .open = ide_capacity_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -EXPORT_SYMBOL_GPL(ide_capacity_proc_fops); - -static int ide_geometry_proc_show(struct seq_file *m, void *v) +int ide_geometry_proc_show(struct seq_file *m, void *v) { ide_drive_t *drive = (ide_drive_t *) m->private; @@ -471,20 +406,7 @@ static int ide_geometry_proc_show(struct seq_file *m, void *v) drive->bios_cyl, drive->bios_head, drive->bios_sect); return 0; } - -static int ide_geometry_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_geometry_proc_show, PDE_DATA(inode)); -} - -const struct file_operations ide_geometry_proc_fops = { - .owner = THIS_MODULE, - .open = ide_geometry_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -EXPORT_SYMBOL(ide_geometry_proc_fops); +EXPORT_SYMBOL(ide_geometry_proc_show); static int ide_dmodel_proc_show(struct seq_file *seq, void *v) { @@ -495,19 +417,6 @@ static int ide_dmodel_proc_show(struct seq_file *seq, void *v) return 0; } -static int ide_dmodel_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_dmodel_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ide_dmodel_proc_fops = { - .owner = THIS_MODULE, - .open = ide_dmodel_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ide_driver_proc_show(struct seq_file *m, void *v) { ide_drive_t *drive = (ide_drive_t *)m->private; @@ -523,65 +432,6 @@ static int ide_driver_proc_show(struct seq_file *m, void *v) return 0; } -static int ide_driver_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ide_driver_proc_show, PDE_DATA(inode)); -} - -static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) -{ - struct device *dev = &drive->gendev; - int ret = 1; - int err; - - device_release_driver(dev); - /* FIXME: device can still be in use by previous driver */ - strlcpy(drive->driver_req, driver, sizeof(drive->driver_req)); - err = device_attach(dev); - if (err < 0) - printk(KERN_WARNING "IDE: %s: device_attach error: %d\n", - __func__, err); - drive->driver_req[0] = 0; - if (dev->driver == NULL) { - err = device_attach(dev); - if (err < 0) - printk(KERN_WARNING - "IDE: %s: device_attach(2) error: %d\n", - __func__, err); - } - if (dev->driver && !strcmp(dev->driver->name, driver)) - ret = 0; - - return ret; -} - -static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer, - size_t count, loff_t *pos) -{ - ide_drive_t *drive = PDE_DATA(file_inode(file)); - char name[32]; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (count > 31) - count = 31; - if (copy_from_user(name, buffer, count)) - return -EFAULT; - name[count] = '\0'; - if (ide_replace_subdriver(drive, name)) - return -EINVAL; - return count; -} - -static const struct file_operations ide_driver_proc_fops = { - .owner = THIS_MODULE, - .open = ide_driver_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = ide_driver_proc_write, -}; - static int ide_media_proc_show(struct seq_file *m, void *v) { ide_drive_t *drive = (ide_drive_t *) m->private; @@ -613,11 +463,10 @@ static const struct file_operations ide_media_proc_fops = { }; static ide_proc_entry_t generic_drive_entries[] = { - { "driver", S_IFREG|S_IRUGO, &ide_driver_proc_fops }, - { "identify", S_IFREG|S_IRUSR, &ide_identify_proc_fops}, - { "media", S_IFREG|S_IRUGO, &ide_media_proc_fops }, - { "model", S_IFREG|S_IRUGO, &ide_dmodel_proc_fops }, - { "settings", S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops}, + { "driver", S_IFREG|S_IRUGO, ide_driver_proc_show }, + { "identify", S_IFREG|S_IRUSR, ide_identify_proc_show }, + { "media", S_IFREG|S_IRUGO, ide_media_proc_show }, + { "model", S_IFREG|S_IRUGO, ide_dmodel_proc_show }, {} }; @@ -628,7 +477,7 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p if (!dir || !p) return; while (p->name != NULL) { - ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data); + ent = proc_create_single_data(p->name, p->mode, dir, p->show, data); if (!ent) return; p++; } @@ -693,8 +542,12 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif) continue; drive->proc = proc_mkdir(drive->name, parent); - if (drive->proc) + if (drive->proc) { ide_add_proc_entries(drive->proc, generic_drive_entries, drive); + proc_create_data("setting", S_IFREG|S_IRUSR|S_IWUSR, + drive->proc, &ide_settings_proc_fops, + drive); + } sprintf(name, "ide%d/%s", (drive->name[2]-'a')/2, drive->name); ent = proc_symlink(drive->name, proc_ide_root, name); if (!ent) return; @@ -704,6 +557,7 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif) void ide_proc_unregister_device(ide_drive_t *drive) { if (drive->proc) { + remove_proc_entry("settings", drive->proc); ide_remove_proc_entries(drive->proc, generic_drive_entries); remove_proc_entry(drive->name, proc_ide_root); remove_proc_entry(drive->name, drive->hwif->proc); @@ -712,9 +566,9 @@ void ide_proc_unregister_device(ide_drive_t *drive) } static ide_proc_entry_t hwif_entries[] = { - { "channel", S_IFREG|S_IRUGO, &ide_channel_proc_fops }, - { "mate", S_IFREG|S_IRUGO, &ide_mate_proc_fops }, - { "model", S_IFREG|S_IRUGO, &ide_imodel_proc_fops }, + { "channel", S_IFREG|S_IRUGO, ide_channel_proc_show }, + { "mate", S_IFREG|S_IRUGO, ide_mate_proc_show }, + { "model", S_IFREG|S_IRUGO, ide_imodel_proc_show }, {} }; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index fd57e8ccc47a..aee7b46d2330 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -854,7 +854,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); BUG_ON(size < 0 || size % tape->blk_size); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; scsi_req(rq)->cmd[13] = cmd; rq->rq_disk = tape->disk; @@ -862,7 +862,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) if (size) { ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, - __GFP_RECLAIM); + GFP_NOIO); if (ret) goto out_put; } @@ -1847,22 +1847,9 @@ static int idetape_name_proc_show(struct seq_file *m, void *v) return 0; } -static int idetape_name_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, idetape_name_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations idetape_name_proc_fops = { - .owner = THIS_MODULE, - .open = idetape_name_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static ide_proc_entry_t idetape_proc[] = { - { "capacity", S_IFREG|S_IRUGO, &ide_capacity_proc_fops }, - { "name", S_IFREG|S_IRUGO, &idetape_name_proc_fops }, + { "capacity", S_IFREG|S_IRUGO, ide_capacity_proc_show }, + { "name", S_IFREG|S_IRUGO, idetape_name_proc_show }, {} }; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index abe0822dd429..c034cd965831 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -431,7 +431,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq = blk_get_request(drive->queue, (cmd->tf_flags & IDE_TFLAG_WRITE) ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_TASKFILE; /* @@ -442,7 +442,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, */ if (nsect) { error = blk_rq_map_kern(drive->queue, rq, buf, - nsect * SECTOR_SIZE, __GFP_RECLAIM); + nsect * SECTOR_SIZE, GFP_NOIO); if (error) goto put_req; } diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 15606f237480..9da79070357c 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -158,6 +158,7 @@ config AT91_SAMA5D2_ADC depends on ARCH_AT91 || COMPILE_TEST depends on HAS_IOMEM depends on HAS_DMA + select IIO_BUFFER select IIO_TRIGGERED_BUFFER help Say yes here to build support for Atmel SAMA5D2 ADC which is diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 801afb61310b..d4bbe5b53318 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -348,55 +348,6 @@ static const u16 ad7793_sample_freq_avail[16] = {0, 470, 242, 123, 62, 50, 39, static const u16 ad7797_sample_freq_avail[16] = {0, 0, 0, 123, 62, 50, 0, 33, 0, 17, 16, 12, 10, 8, 6, 4}; -static ssize_t ad7793_read_frequency(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7793_state *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", - st->chip_info->sample_freq_avail[AD7793_MODE_RATE(st->mode)]); -} - -static ssize_t ad7793_write_frequency(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t len) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7793_state *st = iio_priv(indio_dev); - long lval; - int i, ret; - - ret = kstrtol(buf, 10, &lval); - if (ret) - return ret; - - if (lval == 0) - return -EINVAL; - - for (i = 0; i < 16; i++) - if (lval == st->chip_info->sample_freq_avail[i]) - break; - if (i == 16) - return -EINVAL; - - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - st->mode &= ~AD7793_MODE_RATE(-1); - st->mode |= AD7793_MODE_RATE(i); - ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), st->mode); - iio_device_release_direct_mode(indio_dev); - - return len; -} - -static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, - ad7793_read_frequency, - ad7793_write_frequency); - static IIO_CONST_ATTR_SAMP_FREQ_AVAIL( "470 242 123 62 50 39 33 19 17 16 12 10 8 6 4"); @@ -424,7 +375,6 @@ static IIO_DEVICE_ATTR_NAMED(in_m_in_scale_available, ad7793_show_scale_available, NULL, 0); static struct attribute *ad7793_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_const_attr_sampling_frequency_available.dev_attr.attr, &iio_dev_attr_in_m_in_scale_available.dev_attr.attr, NULL @@ -435,7 +385,6 @@ static const struct attribute_group ad7793_attribute_group = { }; static struct attribute *ad7797_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_const_attr_sampling_frequency_available_ad7797.dev_attr.attr, NULL }; @@ -505,6 +454,10 @@ static int ad7793_read_raw(struct iio_dev *indio_dev, *val -= offset; } return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->chip_info + ->sample_freq_avail[AD7793_MODE_RATE(st->mode)]; + return IIO_VAL_INT; } return -EINVAL; } @@ -542,6 +495,26 @@ static int ad7793_write_raw(struct iio_dev *indio_dev, break; } break; + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) { + ret = -EINVAL; + break; + } + + for (i = 0; i < 16; i++) + if (val == st->chip_info->sample_freq_avail[i]) + break; + + if (i == 16) { + ret = -EINVAL; + break; + } + + st->mode &= ~AD7793_MODE_RATE(-1); + st->mode |= AD7793_MODE_RATE(i); + ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), + st->mode); + break; default: ret = -EINVAL; } diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 4eff8351ce29..8729d6524b4d 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -333,6 +333,27 @@ static const struct iio_chan_spec at91_adc_channels[] = { + AT91_SAMA5D2_DIFF_CHAN_CNT + 1), }; +static int at91_adc_chan_xlate(struct iio_dev *indio_dev, int chan) +{ + int i; + + for (i = 0; i < indio_dev->num_channels; i++) { + if (indio_dev->channels[i].scan_index == chan) + return i; + } + return -EINVAL; +} + +static inline struct iio_chan_spec const * +at91_adc_chan_get(struct iio_dev *indio_dev, int chan) +{ + int index = at91_adc_chan_xlate(indio_dev, chan); + + if (index < 0) + return NULL; + return indio_dev->channels + index; +} + static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) { struct iio_dev *indio = iio_trigger_get_drvdata(trig); @@ -350,8 +371,10 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) at91_adc_writel(st, AT91_SAMA5D2_TRGR, status); for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { - struct iio_chan_spec const *chan = indio->channels + bit; + struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + if (!chan) + continue; if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, BIT(chan->channel)); @@ -448,7 +471,11 @@ static int at91_adc_dma_start(struct iio_dev *indio_dev) for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->num_channels) { - struct iio_chan_spec const *chan = indio_dev->channels + bit; + struct iio_chan_spec const *chan = + at91_adc_chan_get(indio_dev, bit); + + if (!chan) + continue; st->dma_st.rx_buf_sz += chan->scan_type.storagebits / 8; } @@ -526,8 +553,11 @@ static int at91_adc_buffer_predisable(struct iio_dev *indio_dev) */ for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->num_channels) { - struct iio_chan_spec const *chan = indio_dev->channels + bit; + struct iio_chan_spec const *chan = + at91_adc_chan_get(indio_dev, bit); + if (!chan) + continue; if (st->dma_st.dma_chan) at91_adc_readl(st, chan->address); } @@ -587,8 +617,11 @@ static void at91_adc_trigger_handler_nodma(struct iio_dev *indio_dev, for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->num_channels) { - struct iio_chan_spec const *chan = indio_dev->channels + bit; + struct iio_chan_spec const *chan = + at91_adc_chan_get(indio_dev, bit); + if (!chan) + continue; st->buffer[i] = at91_adc_readl(st, chan->address); i++; } diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 01422d11753c..b28a716a23b2 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -144,6 +144,7 @@ static int stm32_dfsdm_set_osrs(struct stm32_dfsdm_filter *fl, * Leave as soon as if exact resolution if reached. * Otherwise the higher resolution below 32 bits is kept. */ + fl->res = 0; for (fosr = 1; fosr <= DFSDM_MAX_FL_OVERSAMPLING; fosr++) { for (iosr = 1; iosr <= DFSDM_MAX_INT_OVERSAMPLING; iosr++) { if (fast) @@ -193,7 +194,7 @@ static int stm32_dfsdm_set_osrs(struct stm32_dfsdm_filter *fl, } } - if (!fl->fosr) + if (!fl->res) return -EINVAL; return 0; @@ -770,7 +771,7 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[chan->channel]; - unsigned int spi_freq = adc->spi_freq; + unsigned int spi_freq; int ret = -EINVAL; switch (mask) { @@ -784,8 +785,18 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SAMP_FREQ: if (!val) return -EINVAL; - if (ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL) + + switch (ch->src) { + case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL: spi_freq = adc->dfsdm->spi_master_freq; + break; + case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_FALLING: + case DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_RISING: + spi_freq = adc->dfsdm->spi_master_freq / 2; + break; + default: + spi_freq = adc->spi_freq; + } if (spi_freq % val) dev_warn(&indio_dev->dev, diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c index 05e0c353e089..b32bf57910ca 100644 --- a/drivers/iio/buffer/industrialio-buffer-dma.c +++ b/drivers/iio/buffer/industrialio-buffer-dma.c @@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(iio_dma_buffer_set_bytes_per_datum); * Should be used as the set_length callback for iio_buffer_access_ops * struct for DMA buffers. */ -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length) +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length) { /* Avoid an invalid state */ if (length < 2) diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c index 047fe757ab97..70c302a93d7f 100644 --- a/drivers/iio/buffer/kfifo_buf.c +++ b/drivers/iio/buffer/kfifo_buf.c @@ -22,11 +22,18 @@ struct iio_kfifo { #define iio_to_kfifo(r) container_of(r, struct iio_kfifo, buffer) static inline int __iio_allocate_kfifo(struct iio_kfifo *buf, - int bytes_per_datum, int length) + size_t bytes_per_datum, unsigned int length) { if ((length == 0) || (bytes_per_datum == 0)) return -EINVAL; + /* + * Make sure we don't overflow an unsigned int after kfifo rounds up to + * the next power of 2. + */ + if (roundup_pow_of_two(length) > UINT_MAX / bytes_per_datum) + return -EINVAL; + return __kfifo_alloc((struct __kfifo *)&buf->kf, length, bytes_per_datum, GFP_KERNEL); } @@ -67,7 +74,7 @@ static int iio_set_bytes_per_datum_kfifo(struct iio_buffer *r, size_t bpd) return 0; } -static int iio_set_length_kfifo(struct iio_buffer *r, int length) +static int iio_set_length_kfifo(struct iio_buffer *r, unsigned int length) { /* Avoid an invalid state */ if (length < 2) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index cfb6588565ba..4905a997a7ec 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -178,14 +178,14 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) #ifdef CONFIG_PM int ret; - atomic_set(&st->user_requested_state, state); - if (atomic_add_unless(&st->runtime_pm_enable, 1, 1)) pm_runtime_enable(&st->pdev->dev); - if (state) + if (state) { + atomic_inc(&st->user_requested_state); ret = pm_runtime_get_sync(&st->pdev->dev); - else { + } else { + atomic_dec(&st->user_requested_state); pm_runtime_mark_last_busy(&st->pdev->dev); pm_runtime_use_autosuspend(&st->pdev->dev); ret = pm_runtime_put_autosuspend(&st->pdev->dev); diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index fb2d347f760f..ecc55e98ddd3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -502,7 +502,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, return -EINVAL; if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) - return -EAGAIN; + return -EINVAL; memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); if (attr) { diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 9a4e899d94b3..2b6c9b516070 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -119,7 +119,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - umem->pid = get_task_pid(current, PIDTYPE_PID); /* * We ask for writable memory if any of the following * access flags are set. "Local write" and "remote write" @@ -132,7 +131,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { - put_pid(umem->pid); ret = ib_umem_odp_get(context, umem, access); if (ret) { kfree(umem); @@ -148,7 +146,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } @@ -231,7 +228,6 @@ out: if (ret < 0) { if (need_release) __ib_umem_release(context->device, umem, 0); - put_pid(umem->pid); kfree(umem); } else current->mm->pinned_vm = locked; @@ -274,8 +270,7 @@ void ib_umem_release(struct ib_umem *umem) __ib_umem_release(umem->context->device, umem, 1); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); + task = get_pid_task(umem->context->tgid, PIDTYPE_PID); if (!task) goto out; mm = get_task_mm(task); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f6c739ec8b62..20b9f31052bf 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -185,12 +185,65 @@ static void bnxt_re_shutdown(void *p) bnxt_re_ib_unreg(rdev, false); } +static void bnxt_re_stop_irq(void *handle) +{ + struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; + struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw; + struct bnxt_qplib_nq *nq; + int indx; + + for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) { + nq = &rdev->nq[indx - 1]; + bnxt_qplib_nq_stop_irq(nq, false); + } + + bnxt_qplib_rcfw_stop_irq(rcfw, false); +} + +static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) +{ + struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; + struct bnxt_msix_entry *msix_ent = rdev->msix_entries; + struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw; + struct bnxt_qplib_nq *nq; + int indx, rc; + + if (!ent) { + /* Not setting the f/w timeout bit in rcfw. + * During the driver unload the first command + * to f/w will timeout and that will set the + * timeout bit. + */ + dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n"); + return; + } + + /* Vectors may change after restart, so update with new vectors + * in device sctructure. + */ + for (indx = 0; indx < rdev->num_msix; indx++) + rdev->msix_entries[indx].vector = ent[indx].vector; + + bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, + false); + for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { + nq = &rdev->nq[indx - 1]; + rc = bnxt_qplib_nq_start_irq(nq, indx - 1, + msix_ent[indx].vector, false); + if (rc) + dev_warn(rdev_to_dev(rdev), + "Failed to reinit NQ index %d\n", indx - 1); + } +} + static struct bnxt_ulp_ops bnxt_re_ulp_ops = { .ulp_async_notifier = NULL, .ulp_stop = bnxt_re_stop, .ulp_start = bnxt_re_start, .ulp_sriov_config = bnxt_re_sriov_config, - .ulp_shutdown = bnxt_re_shutdown + .ulp_shutdown = bnxt_re_shutdown, + .ulp_irq_stop = bnxt_re_stop_irq, + .ulp_irq_restart = bnxt_re_start_irq }; /* RoCE -> Net driver */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 3a78faba8d91..50d8f1fc98d5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -336,22 +336,32 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) return IRQ_HANDLED; } +void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) +{ + tasklet_disable(&nq->worker); + /* Mask h/w interrupt */ + NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + /* Sync with last running IRQ handler */ + synchronize_irq(nq->vector); + if (kill) + tasklet_kill(&nq->worker); + if (nq->requested) { + irq_set_affinity_hint(nq->vector, NULL); + free_irq(nq->vector, nq); + nq->requested = false; + } +} + void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) { if (nq->cqn_wq) { destroy_workqueue(nq->cqn_wq); nq->cqn_wq = NULL; } + /* Make sure the HW is stopped! */ - synchronize_irq(nq->vector); - tasklet_disable(&nq->worker); - tasklet_kill(&nq->worker); + bnxt_qplib_nq_stop_irq(nq, true); - if (nq->requested) { - irq_set_affinity_hint(nq->vector, NULL); - free_irq(nq->vector, nq); - nq->requested = false; - } if (nq->bar_reg_iomem) iounmap(nq->bar_reg_iomem); nq->bar_reg_iomem = NULL; @@ -361,6 +371,40 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) nq->vector = 0; } +int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, + int msix_vector, bool need_init) +{ + int rc; + + if (nq->requested) + return -EFAULT; + + nq->vector = msix_vector; + if (need_init) + tasklet_init(&nq->worker, bnxt_qplib_service_nq, + (unsigned long)nq); + else + tasklet_enable(&nq->worker); + + snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx); + rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq); + if (rc) + return rc; + + cpumask_clear(&nq->mask); + cpumask_set_cpu(nq_indx, &nq->mask); + rc = irq_set_affinity_hint(nq->vector, &nq->mask); + if (rc) { + dev_warn(&nq->pdev->dev, + "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", + nq->vector, nq_indx); + } + nq->requested = true; + NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + + return rc; +} + int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, @@ -372,41 +416,17 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, resource_size_t nq_base; int rc = -1; - nq->pdev = pdev; - nq->vector = msix_vector; if (cqn_handler) nq->cqn_handler = cqn_handler; if (srqn_handler) nq->srqn_handler = srqn_handler; - tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq); - /* Have a task to schedule CQ notifiers in post send case */ nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); if (!nq->cqn_wq) - goto fail; - - nq->requested = false; - memset(nq->name, 0, 32); - sprintf(nq->name, "bnxt_qplib_nq-%d", nq_idx); - rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq); - if (rc) { - dev_err(&nq->pdev->dev, - "Failed to request IRQ for NQ: %#x", rc); - goto fail; - } - - cpumask_clear(&nq->mask); - cpumask_set_cpu(nq_idx, &nq->mask); - rc = irq_set_affinity_hint(nq->vector, &nq->mask); - if (rc) { - dev_warn(&nq->pdev->dev, - "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", - nq->vector, nq_idx); - } + return -ENOMEM; - nq->requested = true; nq->bar_reg = NQ_CONS_PCI_BAR_REGION; nq->bar_reg_off = bar_reg_offset; nq_base = pci_resource_start(pdev, nq->bar_reg); @@ -419,7 +439,13 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, rc = -ENOMEM; goto fail; } - NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); + + rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); + if (rc) { + dev_err(&nq->pdev->dev, + "QPLIB: Failed to request irq for nq-idx %d", nq_idx); + goto fail; + } return 0; fail: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index ade9f13c0fd1..72352ca80ace 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -467,7 +467,10 @@ struct bnxt_qplib_nq_work { struct bnxt_qplib_cq *cq; }; +void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill); void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq); +int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, + int msix_vector, bool need_init); int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 80027a494730..2852d350ada1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -582,19 +582,29 @@ fail: return -ENOMEM; } -void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) +void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) { - unsigned long indx; - - /* Make sure the HW channel is stopped! */ - synchronize_irq(rcfw->vector); tasklet_disable(&rcfw->worker); - tasklet_kill(&rcfw->worker); + /* Mask h/w interrupts */ + CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, + rcfw->creq.max_elements); + /* Sync with last running IRQ-handler */ + synchronize_irq(rcfw->vector); + if (kill) + tasklet_kill(&rcfw->worker); if (rcfw->requested) { free_irq(rcfw->vector, rcfw); rcfw->requested = false; } +} + +void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) +{ + unsigned long indx; + + bnxt_qplib_rcfw_stop_irq(rcfw, true); + if (rcfw->cmdq_bar_reg_iomem) iounmap(rcfw->cmdq_bar_reg_iomem); rcfw->cmdq_bar_reg_iomem = NULL; @@ -614,6 +624,31 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) rcfw->vector = 0; } +int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, + bool need_init) +{ + int rc; + + if (rcfw->requested) + return -EFAULT; + + rcfw->vector = msix_vector; + if (need_init) + tasklet_init(&rcfw->worker, + bnxt_qplib_service_creq, (unsigned long)rcfw); + else + tasklet_enable(&rcfw->worker); + rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0, + "bnxt_qplib_creq", rcfw); + if (rc) + return rc; + rcfw->requested = true; + CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, + rcfw->creq.max_elements); + + return 0; +} + int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, int msix_vector, @@ -675,27 +710,17 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, rcfw->creq_qp_event_processed = 0; rcfw->creq_func_event_processed = 0; - rcfw->vector = msix_vector; if (aeq_handler) rcfw->aeq_handler = aeq_handler; + init_waitqueue_head(&rcfw->waitq); - tasklet_init(&rcfw->worker, bnxt_qplib_service_creq, - (unsigned long)rcfw); - - rcfw->requested = false; - rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0, - "bnxt_qplib_creq", rcfw); + rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); if (rc) { dev_err(&rcfw->pdev->dev, "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc); bnxt_qplib_disable_rcfw_channel(rcfw); return rc; } - rcfw->requested = true; - - init_waitqueue_head(&rcfw->waitq); - - CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, 0, rcfw->creq.max_elements); init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); init.cmdq_size_cmdq_lvl = cpu_to_le16( diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index c7cce2e4185e..46416dfe8830 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -195,7 +195,10 @@ struct bnxt_qplib_rcfw { void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); +void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); +int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, + bool need_init); int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, int msix_vector, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index e90f2fd8dc16..1445918e3239 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) err_dereg_mem: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); -err_free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); err_free_skb: kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6a60fa59f2b..e6bdd0c1e80a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5944,6 +5944,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, u64 status; u32 sw_index; int i = 0; + unsigned long irq_flags; sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { @@ -5953,10 +5954,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, return; } sci = &dd->send_contexts[sw_index]; + spin_lock_irqsave(&dd->sc_lock, irq_flags); sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, sw_index, hw_context); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); return; } @@ -5978,6 +5981,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, */ if (sc->type != SC_USER) queue_work(dd->pport->hfi1_wq, &sc->halt_work); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); /* * Update the counters for the corresponding status bits. diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 14734d0d0b76..3a485f50fede 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->set_ci_db = hr_cq->db.db_record; *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; } /* Init mmt table and write buff address to mtt table */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 47e1b6ac1e1a..8013d69c5ac4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; + free_mr->mr_free_pd->ibpd.__internal_mr = NULL; atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; @@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); - if (ret < 0) { + if (ret < 0 && hr_qp) { dev_err(dev, "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", hr_qp->qpn, ret, hr_mr->key, ne); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 25916e8522ed..1f0965bb64ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; unsigned int ind; void *wqe = NULL; - u32 tmp_len = 0; bool loopback; + u32 tmp_len; int ret = 0; u8 *smac; int nreq; @@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + tmp_len = 0; /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -547,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } if (i < hr_qp->rq.max_gs) { - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; } /* rq support inline data */ - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; + hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = + (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } } hr_qp->rq.wrid[ind] = wr->wr_id; @@ -613,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, ring->desc_num * sizeof(struct hns_roce_cmq_desc), DMA_BIDIRECTIONAL); + + ring->desc_dma_addr = 0; kfree(ring->desc); } @@ -1081,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) if (ret) { dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", ret); + return ret; } /* Get pf resource owned by every pf */ @@ -1372,6 +1380,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, mr->type == MR_TYPE_MR ? 0 : 1); + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, + 1); mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); @@ -2169,6 +2179,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); /* @@ -2281,7 +2292,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, context->rq_db_record_addr = hr_qp->rdb.dma >> 32; qpc_mask->rq_db_record_addr = 0; - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, @@ -4703,6 +4715,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {0, } }; +MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); + static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9d48bc07a9e6..96fb6a9ed93c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -199,7 +199,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, memset(props, 0, sizeof(*props)); - props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid); + props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); props->max_mr_size = (u64)(~(0ULL)); props->page_size_cap = hr_dev->caps.page_size_cap; props->vendor_id = hr_dev->vendor_id; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d4aad34c21e2..baaf906f7c2e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_rq_sge_list; } *hr_qp->rdb.db_record = 0; + hr_qp->rdb_en = 1; } /* Allocate QP buf */ @@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ret = 0; + if (hr_dev->caps.min_wqes) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + } else { + ret = 0; + } + goto out; } diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index d5d8c1be345a..2f2b4426ded7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -207,6 +207,7 @@ struct i40iw_msix_vector { u32 irq; u32 cpu_affinity; u32 ceq_id; + cpumask_t mask; }; struct l2params_work { diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4cfa8f4647e2..f7c6fd9ff6e2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2093,7 +2093,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, if (netif_is_bond_slave(netdev)) netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, &dst_addr); + neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); if (neigh) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 6139836fb533..c9f62ca7643c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) switch (info->ae_id) { case I40IW_AE_LLP_FIN_RECEIVED: if (qp->term_flags) - continue; + break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT; if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) && @@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; case I40IW_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) - continue; + break; i40iw_cm_disconn(iwqp); break; case I40IW_AE_QP_SUSPEND_COMPLETE: diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 9cd0d3ef9057..05001e6da1f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; - cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } - cpumask_clear(&mask); - cpumask_set_cpu(msix_vec->cpu_affinity, &mask); - irq_set_affinity_hint(msix_vec->irq, &mask); + cpumask_clear(&msix_vec->mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); + irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { i40iw_pr_err("ceq irq config fail\n"); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 40e4f5ab2b46..68679ad4c6da 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { + iwpbl->on_list = false; list_del(&iwpbl->list); return iwpbl; } @@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; @@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, goto error; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_CQ: @@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: @@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, switch (iwmr->type) { case IW_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->cq_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->qp_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 9067443cd311..76cf173377ab 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -78,6 +78,7 @@ struct i40iw_pbl { }; bool pbl_allocated; + bool on_list; u64 user_base; struct i40iw_pble_alloc pble_alloc; struct i40iw_mr *iwmr; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4d8ff8ab807..69716a7ea993 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2416,7 +2416,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } -static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, bool inner) { if (inner) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 87b7c1be2a11..2193dc1765fb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -484,11 +484,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } -static int first_med_bfreg(void) -{ - return 1; -} - enum { /* this is the first blue flame register in the array of bfregs assigned * to a processes. Since we do not use it for blue flame but rather @@ -514,6 +509,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev, return n >= 0 ? n : 0; } +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; +} + static int first_hi_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { @@ -541,10 +542,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_bfreg(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) { + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; if (!bfregi->count[minidx]) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7d3763b2e01c..3f9afc02d166 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct qedr_ucontext *ucontext = get_qedr_ucontext(context); struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; - u64 unmapped_db = dev->db_phys_addr; + unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; unsigned long len = (vma->vm_end - vma->vm_start); - int rc = 0; - bool found; + unsigned long dpi_start; + + dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); DP_DEBUG(dev, QEDR_MSG_INIT, - "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", - vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); - if (vma->vm_start & (PAGE_SIZE - 1)) { - DP_ERR(dev, "Vma_start not page aligned = %ld\n", - vma->vm_start); + "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", + (void *)vma->vm_start, (void *)vma->vm_end, + (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + DP_ERR(dev, + "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n", + (void *)vma->vm_start, (void *)vma->vm_end); return -EINVAL; } - found = qedr_search_mmap(ucontext, vm_page, len); - if (!found) { - DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", + if (!qedr_search_mmap(ucontext, phys_addr, len)) { + DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", vma->vm_pgoff); return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - - if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + - dev->db_size))) { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "Trying to map doorbell bar for read\n"); - return -EPERM; - } - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (phys_addr < dpi_start || + ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { + DP_ERR(dev, + "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", + (void *)phys_addr, (void *)dpi_start, + ucontext->dpi_size); + return -EINVAL; + } - rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - PAGE_SIZE, vma->vm_page_prot); - } else { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); - rc = remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff, len, vma->vm_page_prot); + if (vma->vm_flags & VM_READ) { + DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); - return rc; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); } struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2cb52fd48cf1..73a00a1c06f6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; - int must_sched; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, wr = wr->next; } - /* - * Must sched in case of GSI QP because ib_send_mad() hold irq lock, - * and the requester call ip_local_out_sk() that takes spin_lock_bh. - */ - must_sched = (qp_type(qp) == IB_QPT_GSI) || - (queue_count(qp->sq.queue) > 1); - - rxe_run_task(&qp->req.task, must_sched); + rxe_run_task(&qp->req.task, 1); if (unlikely(qp->req.state == QP_STATE_ERROR)) rxe_run_task(&qp->comp.task, 1); diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 49b34de0aed4..47eb8ca729fe 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -509,18 +509,6 @@ static int hp_sdc_rtc_proc_show(struct seq_file *m, void *v) #undef NY } -static int hp_sdc_rtc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, hp_sdc_rtc_proc_show, NULL); -} - -static const struct file_operations hp_sdc_rtc_proc_fops = { - .open = hp_sdc_rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int hp_sdc_rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -713,7 +701,7 @@ static int __init hp_sdc_rtc_init(void) if (misc_register(&hp_sdc_rtc_dev) != 0) printk(KERN_INFO "Could not register misc. dev for i8042 rtc\n"); - proc_create("driver/rtc", 0, NULL, &hp_sdc_rtc_proc_fops); + proc_create_single("driver/rtc", 0, NULL, hp_sdc_rtc_proc_show); printk(KERN_INFO "HP i8042 SDC + MSM-58321 RTC support loaded " "(RTC v " RTC_VERSION ")\n"); diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index 29f99529b187..cfcb32559925 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -130,7 +130,7 @@ static int elan_smbus_get_baseline_data(struct i2c_client *client, bool max_baseline, u8 *value) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, max_baseline ? @@ -149,7 +149,7 @@ static int elan_smbus_get_version(struct i2c_client *client, bool iap, u8 *version) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_IAP_VERSION_CMD : @@ -170,7 +170,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *clickpad) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_SM_VERSION_CMD, val); @@ -188,7 +188,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_UNIQUEID_CMD, val); @@ -205,7 +205,7 @@ static int elan_smbus_get_checksum(struct i2c_client *client, bool iap, u16 *csum) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_FW_CHECKSUM_CMD : @@ -226,7 +226,7 @@ static int elan_smbus_get_max(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val); if (ret != 3) { @@ -246,7 +246,7 @@ static int elan_smbus_get_resolution(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RESOLUTION_CMD, val); if (ret != 3) { @@ -267,7 +267,7 @@ static int elan_smbus_get_num_traces(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_XY_TRACENUM_CMD, val); if (ret != 3) { @@ -294,7 +294,7 @@ static int elan_smbus_iap_get_mode(struct i2c_client *client, { int error; u16 constant; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val); if (error < 0) { @@ -345,7 +345,7 @@ static int elan_smbus_prepare_fw_update(struct i2c_client *client) int len; int error; enum tp_mode mode; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06}; u16 password; @@ -419,7 +419,7 @@ static int elan_smbus_write_fw_block(struct i2c_client *client, struct device *dev = &client->dev; int error; u16 result; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; /* * Due to the limitation of smbus protocol limiting diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 60f2c463d1cc..a9591d278145 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,6 +172,12 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN0071", /* T480 */ + "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ + "LEN0073", /* X1 Carbon G5 (Elantech) */ + "LEN0092", /* X1 Carbon 6 */ + "LEN0096", /* X280 */ + "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ NULL }; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index df171cb85822..5b714a062fa7 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -146,6 +146,7 @@ config INTEL_IOMMU select DMA_DIRECT_OPS select IOMMU_API select IOMMU_IOVA + select NEED_DMA_MAP_STATE select DMAR_TABLE help DMA remapping (DMAR) devices support enables independent address diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 19cd93783c87..baa1ee2bc2ac 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -1340,19 +1340,6 @@ static int capi20_proc_show(struct seq_file *m, void *v) return 0; } -static int capi20_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, capi20_proc_show, NULL); -} - -static const struct file_operations capi20_proc_fops = { - .owner = THIS_MODULE, - .open = capi20_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * /proc/capi/capi20ncci: * applid ncci @@ -1373,23 +1360,10 @@ static int capi20ncci_proc_show(struct seq_file *m, void *v) return 0; } -static int capi20ncci_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, capi20ncci_proc_show, NULL); -} - -static const struct file_operations capi20ncci_proc_fops = { - .owner = THIS_MODULE, - .open = capi20ncci_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void __init proc_init(void) { - proc_create("capi/capi20", 0, NULL, &capi20_proc_fops); - proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops); + proc_create_single("capi/capi20", 0, NULL, capi20_proc_show); + proc_create_single("capi/capi20ncci", 0, NULL, capi20ncci_proc_show); } static void __exit proc_exit(void) diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index 49fef08858c5..7ac51798949d 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -2460,22 +2460,9 @@ static int capidrv_proc_show(struct seq_file *m, void *v) return 0; } -static int capidrv_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, capidrv_proc_show, NULL); -} - -static const struct file_operations capidrv_proc_fops = { - .owner = THIS_MODULE, - .open = capidrv_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void __init proc_init(void) { - proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops); + proc_create_single("capi/capidrv", 0, NULL, capidrv_proc_show); } static void __exit proc_exit(void) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 46c189ad8d94..0ff517d3c98f 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -534,7 +534,8 @@ int attach_capi_ctr(struct capi_ctr *ctr) init_waitqueue_head(&ctr->state_wait_queue); sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr); - ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, ctr); + ctr->procent = proc_create_single_data(ctr->procfn, 0, NULL, + ctr->proc_show, ctr); ncontrollers++; diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c index 68db3c5a1063..c94bd12c0f7c 100644 --- a/drivers/isdn/capi/kcapi_proc.c +++ b/drivers/isdn/capi/kcapi_proc.c @@ -108,32 +108,6 @@ static const struct seq_operations seq_contrstats_ops = { .show = contrstats_show, }; -static int seq_controller_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &seq_controller_ops); -} - -static int seq_contrstats_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &seq_contrstats_ops); -} - -static const struct file_operations proc_controller_ops = { - .owner = THIS_MODULE, - .open = seq_controller_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations proc_contrstats_ops = { - .owner = THIS_MODULE, - .open = seq_contrstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - // /proc/capi/applications: // applid l3cnt dblkcnt dblklen #ncci recvqueuelen // /proc/capi/applstats: @@ -216,34 +190,6 @@ static const struct seq_operations seq_applstats_ops = { .show = applstats_show, }; -static int -seq_applications_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &seq_applications_ops); -} - -static int -seq_applstats_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &seq_applstats_ops); -} - -static const struct file_operations proc_applications_ops = { - .owner = THIS_MODULE, - .open = seq_applications_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations proc_applstats_ops = { - .owner = THIS_MODULE, - .open = seq_applstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - // --------------------------------------------------------------------------- static void *capi_driver_start(struct seq_file *seq, loff_t *pos) @@ -279,22 +225,6 @@ static const struct seq_operations seq_capi_driver_ops = { .show = capi_driver_show, }; -static int -seq_capi_driver_open(struct inode *inode, struct file *file) -{ - int err; - err = seq_open(file, &seq_capi_driver_ops); - return err; -} - -static const struct file_operations proc_driver_ops = { - .owner = THIS_MODULE, - .open = seq_capi_driver_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - // --------------------------------------------------------------------------- void __init @@ -302,11 +232,11 @@ kcapi_proc_init(void) { proc_mkdir("capi", NULL); proc_mkdir("capi/controllers", NULL); - proc_create("capi/controller", 0, NULL, &proc_controller_ops); - proc_create("capi/contrstats", 0, NULL, &proc_contrstats_ops); - proc_create("capi/applications", 0, NULL, &proc_applications_ops); - proc_create("capi/applstats", 0, NULL, &proc_applstats_ops); - proc_create("capi/driver", 0, NULL, &proc_driver_ops); + proc_create_seq("capi/controller", 0, NULL, &seq_controller_ops); + proc_create_seq("capi/contrstats", 0, NULL, &seq_contrstats_ops); + proc_create_seq("capi/applications", 0, NULL, &seq_applications_ops); + proc_create_seq("capi/applstats", 0, NULL, &seq_applstats_ops); + proc_create_seq("capi/driver", 0, NULL, &seq_capi_driver_ops); } void __exit diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c index ccec7778cad2..56748af78c04 100644 --- a/drivers/isdn/gigaset/capi.c +++ b/drivers/isdn/gigaset/capi.c @@ -2437,19 +2437,6 @@ static int gigaset_proc_show(struct seq_file *m, void *v) return 0; } -static int gigaset_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, gigaset_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations gigaset_proc_fops = { - .owner = THIS_MODULE, - .open = gigaset_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /** * gigaset_isdn_regdev() - register device to LL * @cs: device descriptor structure. @@ -2479,7 +2466,7 @@ int gigaset_isdn_regdev(struct cardstate *cs, const char *isdnid) iif->ctr.release_appl = gigaset_release_appl; iif->ctr.send_message = gigaset_send_message; iif->ctr.procinfo = gigaset_procinfo; - iif->ctr.proc_fops = &gigaset_proc_fops; + iif->ctr.proc_show = gigaset_proc_show, INIT_LIST_HEAD(&iif->appls); skb_queue_head_init(&iif->sendqueue); atomic_set(&iif->sendqlen, 0); diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h index c95712dbfa9f..cdfa89c71997 100644 --- a/drivers/isdn/hardware/avm/avmcard.h +++ b/drivers/isdn/hardware/avm/avmcard.h @@ -556,7 +556,7 @@ u16 b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb); void b1_parse_version(avmctrl_info *card); irqreturn_t b1_interrupt(int interrupt, void *devptr); -extern const struct file_operations b1ctl_proc_fops; +int b1_proc_show(struct seq_file *m, void *v); avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *, long rsize, long ssize); @@ -576,6 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl, capi_register_params *rp); void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl); u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb); -extern const struct file_operations b1dmactl_proc_fops; +int b1dma_proc_show(struct seq_file *m, void *v); #endif /* _AVMCARD_H_ */ diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index b1833d08a5fe..5ee5489d3f15 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -637,7 +637,7 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr) } /* ------------------------------------------------------------- */ -static int b1ctl_proc_show(struct seq_file *m, void *v) +int b1_proc_show(struct seq_file *m, void *v) { struct capi_ctr *ctrl = m->private; avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); @@ -699,20 +699,7 @@ static int b1ctl_proc_show(struct seq_file *m, void *v) return 0; } - -static int b1ctl_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, b1ctl_proc_show, PDE_DATA(inode)); -} - -const struct file_operations b1ctl_proc_fops = { - .owner = THIS_MODULE, - .open = b1ctl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -EXPORT_SYMBOL(b1ctl_proc_fops); +EXPORT_SYMBOL(b1_proc_show); /* ------------------------------------------------------------- */ diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c index 9538a9e5e1a8..6a3dc9937ce5 100644 --- a/drivers/isdn/hardware/avm/b1dma.c +++ b/drivers/isdn/hardware/avm/b1dma.c @@ -858,7 +858,7 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) /* ------------------------------------------------------------- */ -static int b1dmactl_proc_show(struct seq_file *m, void *v) +int b1dma_proc_show(struct seq_file *m, void *v) { struct capi_ctr *ctrl = m->private; avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); @@ -941,20 +941,7 @@ static int b1dmactl_proc_show(struct seq_file *m, void *v) return 0; } - -static int b1dmactl_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, b1dmactl_proc_show, PDE_DATA(inode)); -} - -const struct file_operations b1dmactl_proc_fops = { - .owner = THIS_MODULE, - .open = b1dmactl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -EXPORT_SYMBOL(b1dmactl_proc_fops); +EXPORT_SYMBOL(b1dma_proc_show); /* ------------------------------------------------------------- */ diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c index 54e871a47387..cdfea72e0ef6 100644 --- a/drivers/isdn/hardware/avm/b1isa.c +++ b/drivers/isdn/hardware/avm/b1isa.c @@ -121,7 +121,7 @@ static int b1isa_probe(struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1isa_procinfo; - cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; + cinfo->capi_ctrl.proc_show = b1_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c index ac4863c2ecbc..b76b57a82c02 100644 --- a/drivers/isdn/hardware/avm/b1pci.c +++ b/drivers/isdn/hardware/avm/b1pci.c @@ -112,7 +112,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1pci_procinfo; - cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; + cinfo->capi_ctrl.proc_show = b1_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); cinfo->capi_ctrl.owner = THIS_MODULE; @@ -251,7 +251,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1dma_load_firmware; cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr; cinfo->capi_ctrl.procinfo = b1pciv4_procinfo; - cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops; + cinfo->capi_ctrl.proc_show = b1dma_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c index 6b0d19d963d5..3aca16e62902 100644 --- a/drivers/isdn/hardware/avm/b1pcmcia.c +++ b/drivers/isdn/hardware/avm/b1pcmcia.c @@ -108,7 +108,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq, cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1pcmcia_procinfo; - cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; + cinfo->capi_ctrl.proc_show = b1_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index 034cabac699d..ac72cd204c4d 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -1127,19 +1127,6 @@ static int c4_proc_show(struct seq_file *m, void *v) return 0; } -static int c4_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, c4_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations c4_proc_fops = { - .owner = THIS_MODULE, - .open = c4_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* ------------------------------------------------------------- */ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev, @@ -1211,7 +1198,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev, cinfo->capi_ctrl.load_firmware = c4_load_firmware; cinfo->capi_ctrl.reset_ctr = c4_reset_ctr; cinfo->capi_ctrl.procinfo = c4_procinfo; - cinfo->capi_ctrl.proc_fops = &c4_proc_fops; + cinfo->capi_ctrl.proc_show = c4_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c index 9f80d20ced87..2153619c5b31 100644 --- a/drivers/isdn/hardware/avm/t1isa.c +++ b/drivers/isdn/hardware/avm/t1isa.c @@ -430,7 +430,7 @@ static int t1isa_probe(struct pci_dev *pdev, int cardnr) cinfo->capi_ctrl.load_firmware = t1isa_load_firmware; cinfo->capi_ctrl.reset_ctr = t1isa_reset_ctr; cinfo->capi_ctrl.procinfo = t1isa_procinfo; - cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; + cinfo->capi_ctrl.proc_show = b1_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c index 2180b1685691..f5ed1d5004c9 100644 --- a/drivers/isdn/hardware/avm/t1pci.c +++ b/drivers/isdn/hardware/avm/t1pci.c @@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1dma_load_firmware; cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr; cinfo->capi_ctrl.procinfo = t1pci_procinfo; - cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops; + cinfo->capi_ctrl.proc_show = b1dma_proc_show; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c index be36d82004d6..f9244dc1c3c9 100644 --- a/drivers/isdn/hardware/eicon/capimain.c +++ b/drivers/isdn/hardware/eicon/capimain.c @@ -90,19 +90,6 @@ static int diva_ctl_proc_show(struct seq_file *m, void *v) return 0; } -static int diva_ctl_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, diva_ctl_proc_show, NULL); -} - -static const struct file_operations diva_ctl_proc_fops = { - .owner = THIS_MODULE, - .open = diva_ctl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * set additional os settings in capi_ctr struct */ @@ -111,7 +98,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl) ctrl->driver_name = DRIVERLNAME; ctrl->load_firmware = NULL; ctrl->reset_ctr = NULL; - ctrl->proc_fops = &diva_ctl_proc_fops; + ctrl->proc_show = diva_ctl_proc_show; ctrl->owner = THIS_MODULE; } diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c index 944a7f338099..1b25d8bc153a 100644 --- a/drivers/isdn/hardware/eicon/diva.c +++ b/drivers/isdn/hardware/eicon/diva.c @@ -388,10 +388,10 @@ void divasa_xdi_driver_unload(void) ** Receive and process command from user mode utility */ void *diva_xdi_open_adapter(void *os_handle, const void __user *src, - int length, + int length, void *mptr, divas_xdi_copy_from_user_fn_t cp_fn) { - diva_xdi_um_cfg_cmd_t msg; + diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; diva_os_xdi_adapter_t *a = NULL; diva_os_spin_lock_magic_t old_irql; struct list_head *tmp; @@ -401,21 +401,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src, length, sizeof(diva_xdi_um_cfg_cmd_t))) return NULL; } - if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) { + if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) { DBG_ERR(("A: A(?) open, write error")) return NULL; } diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter"); list_for_each(tmp, &adapter_queue) { a = list_entry(tmp, diva_os_xdi_adapter_t, link); - if (a->controller == (int)msg.adapter) + if (a->controller == (int)msg->adapter) break; a = NULL; } diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter"); if (!a) { - DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter)) + DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter)) } return (a); @@ -437,8 +437,10 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle) int diva_xdi_write(void *adapter, void *os_handle, const void __user *src, - int length, divas_xdi_copy_from_user_fn_t cp_fn) + int length, void *mptr, + divas_xdi_copy_from_user_fn_t cp_fn) { + diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter; void *data; @@ -459,7 +461,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src, return (-2); } - length = (*cp_fn) (os_handle, data, src, length); + if (msg) { + *(diva_xdi_um_cfg_cmd_t *)data = *msg; + length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg), + src + sizeof(*msg), length - sizeof(*msg)); + } else { + length = (*cp_fn) (os_handle, data, src, length); + } if (length > 0) { if ((*(a->interface.cmd_proc)) (a, (diva_xdi_um_cfg_cmd_t *) data, length)) { diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h index b067032093a8..1ad76650fbf9 100644 --- a/drivers/isdn/hardware/eicon/diva.h +++ b/drivers/isdn/hardware/eicon/diva.h @@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst, int max_length, divas_xdi_copy_to_user_fn_t cp_fn); int diva_xdi_write(void *adapter, void *os_handle, const void __user *src, - int length, divas_xdi_copy_from_user_fn_t cp_fn); + int length, void *msg, + divas_xdi_copy_from_user_fn_t cp_fn); void *diva_xdi_open_adapter(void *os_handle, const void __user *src, - int length, + int length, void *msg, divas_xdi_copy_from_user_fn_t cp_fn); void diva_xdi_close_adapter(void *adapter, void *os_handle); diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c index fab6ccfb00d5..60e79257dd5f 100644 --- a/drivers/isdn/hardware/eicon/diva_didd.c +++ b/drivers/isdn/hardware/eicon/diva_didd.c @@ -78,26 +78,13 @@ static int divadidd_proc_show(struct seq_file *m, void *v) return 0; } -static int divadidd_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, divadidd_proc_show, NULL); -} - -static const struct file_operations divadidd_proc_fops = { - .owner = THIS_MODULE, - .open = divadidd_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init create_proc(void) { proc_net_eicon = proc_mkdir("eicon", init_net.proc_net); if (proc_net_eicon) { - proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon, - &divadidd_proc_fops); + proc_didd = proc_create_single(DRIVERLNAME, S_IRUGO, + proc_net_eicon, divadidd_proc_show); return (1); } return (0); diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c index 525518c945fe..e7081e0c0e35 100644 --- a/drivers/isdn/hardware/eicon/divasi.c +++ b/drivers/isdn/hardware/eicon/divasi.c @@ -101,23 +101,10 @@ static int um_idi_proc_show(struct seq_file *m, void *v) return 0; } -static int um_idi_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, um_idi_proc_show, NULL); -} - -static const struct file_operations um_idi_proc_fops = { - .owner = THIS_MODULE, - .open = um_idi_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init create_um_idi_proc(void) { - um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon, - &um_idi_proc_fops); + um_idi_proc_entry = proc_create_single(DRIVERLNAME, S_IRUGO, + proc_net_eicon, um_idi_proc_show); if (!um_idi_proc_entry) return (0); return (1); diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index b9980e84f9db..b6a3950b2564 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file) static ssize_t divas_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + diva_xdi_um_cfg_cmd_t msg; int ret = -EINVAL; if (!file->private_data) { file->private_data = diva_xdi_open_adapter(file, buf, - count, + count, &msg, xdi_copy_from_user); - } - if (!file->private_data) { - return (-ENODEV); + if (!file->private_data) + return (-ENODEV); + ret = diva_xdi_write(file->private_data, file, + buf, count, &msg, xdi_copy_from_user); + } else { + ret = diva_xdi_write(file->private_data, file, + buf, count, NULL, xdi_copy_from_user); } - ret = diva_xdi_write(file->private_data, file, - buf, count, xdi_copy_from_user); switch (ret) { case -1: /* Message should be removed from rx mailbox first */ ret = -EBUSY; @@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf, static ssize_t divas_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + diva_xdi_um_cfg_cmd_t msg; int ret = -EINVAL; if (!file->private_data) { file->private_data = diva_xdi_open_adapter(file, buf, - count, + count, &msg, xdi_copy_from_user); } if (!file->private_data) { diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index eac0f51a0f60..a2c15cd7bf67 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -467,19 +467,6 @@ static int hycapi_proc_show(struct seq_file *m, void *v) return 0; } -static int hycapi_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, hycapi_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations hycapi_proc_fops = { - .owner = THIS_MODULE, - .open = hycapi_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /************************************************************** hycapi_load_firmware @@ -774,7 +761,7 @@ hycapi_capi_create(hysdn_card *card) ctrl->load_firmware = hycapi_load_firmware; ctrl->reset_ctr = hycapi_reset_ctr; ctrl->procinfo = hycapi_procinfo; - ctrl->proc_fops = &hycapi_proc_fops; + ctrl->proc_show = hycapi_proc_show; strcpy(ctrl->name, cinfo->cardname); ctrl->owner = THIS_MODULE; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 63171cdce270..60aa7bc5a630 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -431,7 +431,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return 0; err_sysfs: if (tt->exit) - tt->exit(targetdata); + tt->exit(targetdata, true); err_init: blk_cleanup_queue(tqueue); tdisk->queue = NULL; @@ -446,7 +446,7 @@ err_reserve: return ret; } -static void __nvm_remove_target(struct nvm_target *t) +static void __nvm_remove_target(struct nvm_target *t, bool graceful) { struct nvm_tgt_type *tt = t->type; struct gendisk *tdisk = t->disk; @@ -459,7 +459,7 @@ static void __nvm_remove_target(struct nvm_target *t) tt->sysfs_exit(tdisk); if (tt->exit) - tt->exit(tdisk->private_data); + tt->exit(tdisk->private_data, graceful); nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); @@ -489,7 +489,7 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) mutex_unlock(&dev->mlock); return 1; } - __nvm_remove_target(t); + __nvm_remove_target(t, true); mutex_unlock(&dev->mlock); return 0; @@ -963,7 +963,7 @@ void nvm_unregister(struct nvm_dev *dev) list_for_each_entry_safe(t, tmp, &dev->targets, list) { if (t->dev->parent != dev) continue; - __nvm_remove_target(t); + __nvm_remove_target(t, false); } mutex_unlock(&dev->mlock); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 29a23111b31c..b1c6d7eb6115 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -44,13 +44,15 @@ retry: goto out; } - if (unlikely(!bio_has_data(bio))) - goto out; - pblk_ppa_set_empty(&w_ctx.ppa); w_ctx.flags = flags; - if (bio->bi_opf & REQ_PREFLUSH) + if (bio->bi_opf & REQ_PREFLUSH) { w_ctx.flags |= PBLK_FLUSH_ENTRY; + pblk_write_kick(pblk); + } + + if (unlikely(!bio_has_data(bio))) + goto out; for (i = 0; i < nr_entries; i++) { void *data = bio_data(bio); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 94d5d97c9d8a..ed9cc977c8b3 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -40,7 +40,7 @@ static void pblk_line_mark_bb(struct work_struct *work) } kfree(ppa); - mempool_free(line_ws, pblk->gen_ws_pool); + mempool_free(line_ws, &pblk->gen_ws_pool); } static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, @@ -102,7 +102,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, pblk->e_rq_pool); + mempool_free(rqd, &pblk->e_rq_pool); } /* @@ -237,15 +237,15 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) switch (type) { case PBLK_WRITE: case PBLK_WRITE_INT: - pool = pblk->w_rq_pool; + pool = &pblk->w_rq_pool; rq_size = pblk_w_rq_size; break; case PBLK_READ: - pool = pblk->r_rq_pool; + pool = &pblk->r_rq_pool; rq_size = pblk_g_rq_size; break; default: - pool = pblk->e_rq_pool; + pool = &pblk->e_rq_pool; rq_size = pblk_g_rq_size; } @@ -265,20 +265,22 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) case PBLK_WRITE: kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); case PBLK_WRITE_INT: - pool = pblk->w_rq_pool; + pool = &pblk->w_rq_pool; break; case PBLK_READ: - pool = pblk->r_rq_pool; + pool = &pblk->r_rq_pool; break; case PBLK_ERASE: - pool = pblk->e_rq_pool; + pool = &pblk->e_rq_pool; break; default: pr_err("pblk: trying to free unknown rqd type\n"); return; } - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); mempool_free(rqd, pool); } @@ -292,7 +294,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; - mempool_free(bv.bv_page, pblk->page_bio_pool); + mempool_free(bv.bv_page, &pblk->page_bio_pool); } } @@ -304,23 +306,23 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int i, ret; for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(pblk->page_bio_pool, flags); + page = mempool_alloc(&pblk->page_bio_pool, flags); ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); if (ret != PBLK_EXPOSED_PAGE_SIZE) { pr_err("pblk: could not add page to bio\n"); - mempool_free(page, pblk->page_bio_pool); + mempool_free(page, &pblk->page_bio_pool); goto err; } } return 0; err: - pblk_bio_free_pages(pblk, bio, 0, i - 1); + pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i); return -1; } -static void pblk_write_kick(struct pblk *pblk) +void pblk_write_kick(struct pblk *pblk) { wake_up_process(pblk->writer_ts); mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); @@ -342,13 +344,6 @@ void pblk_write_should_kick(struct pblk *pblk) pblk_write_kick(pblk); } -void pblk_end_io_sync(struct nvm_rq *rqd) -{ - struct completion *waiting = rqd->private; - - complete(waiting); -} - static void pblk_wait_for_meta(struct pblk *pblk) { do { @@ -380,7 +375,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) lockdep_assert_held(&line->lock); - if (!vsc) { + if (line->w_err_gc->has_write_err) { + if (line->gc_group != PBLK_LINEGC_WERR) { + line->gc_group = PBLK_LINEGC_WERR; + move_list = &l_mg->gc_werr_list; + pblk_rl_werr_line_in(&pblk->rl); + } + } else if (!vsc) { if (line->gc_group != PBLK_LINEGC_FULL) { line->gc_group = PBLK_LINEGC_FULL; move_list = &l_mg->gc_full_list; @@ -467,16 +468,13 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; -#ifdef CONFIG_NVM_DEBUG - int ret; + atomic_inc(&pblk->inflight_io); - ret = pblk_check_io(pblk, rqd); - if (ret) - return ret; +#ifdef CONFIG_NVM_DEBUG + if (pblk_check_io(pblk, rqd)) + return NVM_IO_ERR; #endif - atomic_inc(&pblk->inflight_io); - return nvm_submit_io(dev, rqd); } @@ -484,16 +482,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; -#ifdef CONFIG_NVM_DEBUG - int ret; + atomic_inc(&pblk->inflight_io); - ret = pblk_check_io(pblk, rqd); - if (ret) - return ret; +#ifdef CONFIG_NVM_DEBUG + if (pblk_check_io(pblk, rqd)) + return NVM_IO_ERR; #endif - atomic_inc(&pblk->inflight_io); - return nvm_submit_io_sync(dev, rqd); } @@ -856,9 +851,10 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == PBLK_WRITE) + if (dir == PBLK_WRITE) { pblk_log_write_err(pblk, &rqd); - else if (dir == PBLK_READ) + ret = 1; + } else if (dir == PBLK_READ) pblk_log_read_err(pblk, &rqd); } @@ -1071,6 +1067,25 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, return 1; } +static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + + line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!line->map_bitmap) + return -ENOMEM; + + /* will be initialized using bb info from map_bitmap */ + line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!line->invalid_bitmap) { + kfree(line->map_bitmap); + line->map_bitmap = NULL; + return -ENOMEM; + } + + return 0; +} + /* For now lines are always assumed full lines. Thus, smeta former and current * lun bitmaps are omitted. */ @@ -1108,7 +1123,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { pr_debug("pblk: line smeta I/O failed. Retry\n"); - return 1; + return 0; } bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); @@ -1174,19 +1189,9 @@ static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line) static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; + int blk_in_line = atomic_read(&line->blk_in_line); int blk_to_erase; - line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); - if (!line->map_bitmap) - return -ENOMEM; - - /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC); - if (!line->invalid_bitmap) { - kfree(line->map_bitmap); - return -ENOMEM; - } - /* Bad blocks do not need to be erased */ bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); @@ -1199,16 +1204,19 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) blk_to_erase = pblk_prepare_new_line(pblk, line); line->state = PBLK_LINESTATE_FREE; } else { - blk_to_erase = atomic_read(&line->blk_in_line); + blk_to_erase = blk_in_line; } - if (line->state != PBLK_LINESTATE_FREE) { - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); + if (blk_in_line < lm->min_blk_line) { spin_unlock(&line->lock); + return -EAGAIN; + } + + if (line->state != PBLK_LINESTATE_FREE) { WARN(1, "pblk: corrupted line %d, state %d\n", line->id, line->state); - return -EAGAIN; + spin_unlock(&line->lock); + return -EINTR; } line->state = PBLK_LINESTATE_OPEN; @@ -1241,13 +1249,16 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) } spin_unlock(&l_mg->free_lock); - pblk_rl_free_lines_dec(&pblk->rl, line, true); + ret = pblk_line_alloc_bitmaps(pblk, line); + if (ret) + return ret; if (!pblk_line_init_bb(pblk, line, 0)) { list_add(&line->list, &l_mg->free_list); return -EINTR; } + pblk_rl_free_lines_dec(&pblk->rl, line, true); return 0; } @@ -1259,6 +1270,24 @@ void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) line->emeta = NULL; } +static void pblk_line_reinit(struct pblk_line *line) +{ + *line->vsc = cpu_to_le32(EMPTY_ENTRY); + + line->map_bitmap = NULL; + line->invalid_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +void pblk_line_free(struct pblk_line *line) +{ + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); + + pblk_line_reinit(line); +} + struct pblk_line *pblk_line_get(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1292,10 +1321,14 @@ retry: ret = pblk_line_prepare(pblk, line); if (ret) { - if (ret == -EAGAIN) { + switch (ret) { + case -EAGAIN: + list_add(&line->list, &l_mg->bad_list); + goto retry; + case -EINTR: list_add(&line->list, &l_mg->corrupt_list); goto retry; - } else { + default: pr_err("pblk: failed to prepare line %d\n", line->id); list_add(&line->list, &l_mg->free_list); l_mg->nr_free_lines++; @@ -1321,11 +1354,14 @@ retry: return NULL; } + retry_line->map_bitmap = line->map_bitmap; + retry_line->invalid_bitmap = line->invalid_bitmap; retry_line->smeta = line->smeta; retry_line->emeta = line->emeta; retry_line->meta_line = line->meta_line; - pblk_line_free(pblk, line); + pblk_line_reinit(line); + l_mg->data_line = retry_line; spin_unlock(&l_mg->free_lock); @@ -1378,6 +1414,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) } spin_unlock(&l_mg->free_lock); + if (pblk_line_alloc_bitmaps(pblk, line)) + return NULL; + if (pblk_line_erase(pblk, line)) { line = pblk_line_retry(pblk, line); if (!line) @@ -1449,7 +1488,7 @@ static void pblk_line_close_meta_sync(struct pblk *pblk) flush_workqueue(pblk->close_wq); } -void pblk_pipeline_stop(struct pblk *pblk) +void __pblk_pipeline_flush(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; int ret; @@ -1474,6 +1513,11 @@ void pblk_pipeline_stop(struct pblk *pblk) flush_workqueue(pblk->bb_wq); pblk_line_close_meta_sync(pblk); +} + +void __pblk_pipeline_stop(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; spin_lock(&l_mg->free_lock); pblk->state = PBLK_STATE_STOPPED; @@ -1482,6 +1526,12 @@ void pblk_pipeline_stop(struct pblk *pblk) spin_unlock(&l_mg->free_lock); } +void pblk_pipeline_stop(struct pblk *pblk) +{ + __pblk_pipeline_flush(pblk); + __pblk_pipeline_stop(pblk); +} + struct pblk_line *pblk_line_replace_data(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1511,6 +1561,9 @@ retry_erase: goto retry_erase; } + if (pblk_line_alloc_bitmaps(pblk, new)) + return NULL; + retry_setup: if (!pblk_line_init_metadata(pblk, new, cur)) { new = pblk_line_retry(pblk, new); @@ -1550,19 +1603,6 @@ out: return new; } -void pblk_line_free(struct pblk *pblk, struct pblk_line *line) -{ - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); - - *line->vsc = cpu_to_le32(EMPTY_ENTRY); - - line->map_bitmap = NULL; - line->invalid_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1572,9 +1612,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_FREE; line->gc_group = PBLK_LINEGC_NONE; - pblk_line_free(pblk, line); - spin_unlock(&line->lock); + pblk_line_free(line); + + if (line->w_err_gc->has_write_err) { + pblk_rl_werr_line_out(&pblk->rl); + line->w_err_gc->has_write_err = 0; + } + spin_unlock(&line->lock); atomic_dec(&gc->pipeline_gc); spin_lock(&l_mg->free_lock); @@ -1593,7 +1638,7 @@ static void pblk_line_put_ws(struct work_struct *work) struct pblk_line *line = line_put_ws->line; __pblk_line_put(pblk, line); - mempool_free(line_put_ws, pblk->gen_ws_pool); + mempool_free(line_put_ws, &pblk->gen_ws_pool); } void pblk_line_put(struct kref *ref) @@ -1610,7 +1655,7 @@ void pblk_line_put_wq(struct kref *ref) struct pblk *pblk = line->pblk; struct pblk_line_ws *line_put_ws; - line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC); + line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC); if (!line_put_ws) return; @@ -1737,11 +1782,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) spin_lock(&l_mg->close_lock); spin_lock(&line->lock); + + /* Update the in-memory start address for emeta, in case it has + * shifted due to write errors + */ + if (line->emeta_ssec != line->cur_sec) + line->emeta_ssec = line->cur_sec; + list_add_tail(&line->list, &l_mg->emeta_list); spin_unlock(&line->lock); spin_unlock(&l_mg->close_lock); pblk_line_should_sync_meta(pblk); + + +} + +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + unsigned int lba_list_size = lm->emeta_len[2]; + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + struct pblk_emeta *emeta = line->emeta; + + w_err_gc->lba_list = pblk_malloc(lba_list_size, + l_mg->emeta_alloc_type, GFP_KERNEL); + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf), + lba_list_size); } void pblk_line_close_ws(struct work_struct *work) @@ -1750,9 +1818,16 @@ void pblk_line_close_ws(struct work_struct *work) ws); struct pblk *pblk = line_ws->pblk; struct pblk_line *line = line_ws->line; + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + + /* Write errors makes the emeta start address stored in smeta invalid, + * so keep a copy of the lba list until we've gc'd the line + */ + if (w_err_gc->has_write_err) + pblk_save_lba_list(pblk, line); pblk_line_close(pblk, line); - mempool_free(line_ws, pblk->gen_ws_pool); + mempool_free(line_ws, &pblk->gen_ws_pool); } void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, @@ -1761,7 +1836,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, { struct pblk_line_ws *line_ws; - line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask); + line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask); line_ws->pblk = pblk; line_ws->line = line; diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 6851a5c67189..df88f1bdd921 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -129,6 +129,53 @@ out: kfree(gc_rq_ws); } +static __le64 *get_lba_list_from_emeta(struct pblk *pblk, + struct pblk_line *line) +{ + struct line_emeta *emeta_buf; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + unsigned int lba_list_size = lm->emeta_len[2]; + __le64 *lba_list; + int ret; + + emeta_buf = pblk_malloc(lm->emeta_len[0], + l_mg->emeta_alloc_type, GFP_KERNEL); + if (!emeta_buf) + return NULL; + + ret = pblk_line_read_emeta(pblk, line, emeta_buf); + if (ret) { + pr_err("pblk: line %d read emeta failed (%d)\n", + line->id, ret); + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + return NULL; + } + + /* If this read fails, it means that emeta is corrupted. + * For now, leave the line untouched. + * TODO: Implement a recovery routine that scans and moves + * all sectors on the line. + */ + + ret = pblk_recov_check_emeta(pblk, emeta_buf); + if (ret) { + pr_err("pblk: inconsistent emeta (line %d)\n", + line->id); + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + return NULL; + } + + lba_list = pblk_malloc(lba_list_size, + l_mg->emeta_alloc_type, GFP_KERNEL); + if (lba_list) + memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size); + + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + + return lba_list; +} + static void pblk_gc_line_prepare_ws(struct work_struct *work) { struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, @@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct pblk_gc *gc = &pblk->gc; - struct line_emeta *emeta_buf; struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; __le64 *lba_list; unsigned long *invalid_bitmap; int sec_left, nr_secs, bit; - int ret; invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); if (!invalid_bitmap) goto fail_free_ws; - emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, - GFP_KERNEL); - if (!emeta_buf) { - pr_err("pblk: cannot use GC emeta\n"); - goto fail_free_bitmap; - } - - ret = pblk_line_read_emeta(pblk, line, emeta_buf); - if (ret) { - pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); - goto fail_free_emeta; - } - - /* If this read fails, it means that emeta is corrupted. For now, leave - * the line untouched. TODO: Implement a recovery routine that scans and - * moves all sectors on the line. - */ - - ret = pblk_recov_check_emeta(pblk, emeta_buf); - if (ret) { - pr_err("pblk: inconsistent emeta (line %d)\n", line->id); - goto fail_free_emeta; - } - - lba_list = emeta_to_lbas(pblk, emeta_buf); - if (!lba_list) { - pr_err("pblk: could not interpret emeta (line %d)\n", line->id); - goto fail_free_emeta; + if (line->w_err_gc->has_write_err) { + lba_list = line->w_err_gc->lba_list; + line->w_err_gc->lba_list = NULL; + } else { + lba_list = get_lba_list_from_emeta(pblk, line); + if (!lba_list) { + pr_err("pblk: could not interpret emeta (line %d)\n", + line->id); + goto fail_free_ws; + } } spin_lock(&line->lock); @@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) if (sec_left < 0) { pr_err("pblk: corrupted GC line (%d)\n", line->id); - goto fail_free_emeta; + goto fail_free_lba_list; } bit = -1; next_rq: gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); if (!gc_rq) - goto fail_free_emeta; + goto fail_free_lba_list; nr_secs = 0; do { @@ -240,7 +267,7 @@ next_rq: goto next_rq; out: - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + pblk_mfree(lba_list, l_mg->emeta_alloc_type); kfree(line_ws); kfree(invalid_bitmap); @@ -251,9 +278,8 @@ out: fail_free_gc_rq: kfree(gc_rq); -fail_free_emeta: - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); -fail_free_bitmap: +fail_free_lba_list: + pblk_mfree(lba_list, l_mg->emeta_alloc_type); kfree(invalid_bitmap); fail_free_ws: kfree(line_ws); @@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) { unsigned int nr_blocks_free, nr_blocks_need; + unsigned int werr_lines = atomic_read(&rl->werr_lines); nr_blocks_need = pblk_rl_high_thrs(rl); nr_blocks_free = pblk_rl_nr_free_blks(rl); /* This is not critical, no need to take lock here */ - return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); + return ((werr_lines > 0) || + ((gc->gc_active) && (nr_blocks_need > nr_blocks_free))); } void pblk_gc_free_full_lines(struct pblk *pblk) @@ -649,7 +677,7 @@ fail_free_main_kthread: return ret; } -void pblk_gc_exit(struct pblk *pblk) +void pblk_gc_exit(struct pblk *pblk, bool graceful) { struct pblk_gc *gc = &pblk->gc; @@ -663,10 +691,12 @@ void pblk_gc_exit(struct pblk *pblk) if (gc->gc_reader_ts) kthread_stop(gc->gc_reader_ts); - flush_workqueue(gc->gc_reader_wq); - destroy_workqueue(gc->gc_reader_wq); + if (graceful) { + flush_workqueue(gc->gc_reader_wq); + flush_workqueue(gc->gc_line_reader_wq); + } - flush_workqueue(gc->gc_line_reader_wq); + destroy_workqueue(gc->gc_reader_wq); destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 91a5bc2556a3..ce561f5d48ce 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,10 +20,15 @@ #include "pblk.h" +unsigned int write_buffer_size; + +module_param(write_buffer_size, uint, 0644); +MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); + static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); -struct bio_set *pblk_bio_set; +struct bio_set pblk_bio_set; static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, struct bio *bio) @@ -127,10 +132,8 @@ static int pblk_l2p_recover(struct pblk *pblk, bool factory_init) if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); - if (!line) { - pr_err("pblk: line list corrupted\n"); + if (!line) return -EFAULT; - } } return 0; @@ -141,6 +144,7 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) sector_t i; struct ppa_addr ppa; size_t map_size; + int ret = 0; map_size = pblk_trans_map_size(pblk); pblk->trans_map = vmalloc(map_size); @@ -152,7 +156,11 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) for (i = 0; i < pblk->rl.nr_secs; i++) pblk_trans_map_set(pblk, i, ppa); - return pblk_l2p_recover(pblk, factory_init); + ret = pblk_l2p_recover(pblk, factory_init); + if (ret) + vfree(pblk->trans_map); + + return ret; } static void pblk_rwb_free(struct pblk *pblk) @@ -169,10 +177,15 @@ static int pblk_rwb_init(struct pblk *pblk) struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_rb_entry *entries; - unsigned long nr_entries; + unsigned long nr_entries, buffer_size; unsigned int power_size, power_seg_sz; - nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); + if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer)) + buffer_size = write_buffer_size; + else + buffer_size = pblk->pgs_in_buffer; + + nr_entries = pblk_rb_calculate_size(buffer_size); entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); if (!entries) @@ -341,7 +354,7 @@ static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int max_write_ppas; + int ret, max_write_ppas; atomic64_set(&pblk->user_wa, 0); atomic64_set(&pblk->pad_wa, 0); @@ -375,33 +388,33 @@ static int pblk_core_init(struct pblk *pblk) goto fail_free_pad_dist; /* Internal bios can be at most the sectors signaled by the device. */ - pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0); - if (!pblk->page_bio_pool) + ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0); + if (ret) goto free_global_caches; - pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, - pblk_ws_cache); - if (!pblk->gen_ws_pool) + ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (ret) goto free_page_bio_pool; - pblk->rec_pool = mempool_create_slab_pool(geo->all_luns, - pblk_rec_cache); - if (!pblk->rec_pool) + ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, + pblk_rec_cache); + if (ret) goto free_gen_ws_pool; - pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_g_rq_cache); - if (!pblk->r_rq_pool) + ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, + pblk_g_rq_cache); + if (ret) goto free_rec_pool; - pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_g_rq_cache); - if (!pblk->e_rq_pool) + ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, + pblk_g_rq_cache); + if (ret) goto free_r_rq_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_w_rq_cache); - if (!pblk->w_rq_pool) + ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, + pblk_w_rq_cache); + if (ret) goto free_e_rq_pool; pblk->close_wq = alloc_workqueue("pblk-close-wq", @@ -423,6 +436,7 @@ static int pblk_core_init(struct pblk *pblk) goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); + INIT_LIST_HEAD(&pblk->resubmit_list); return 0; @@ -433,17 +447,17 @@ free_bb_wq: free_close_wq: destroy_workqueue(pblk->close_wq); free_w_rq_pool: - mempool_destroy(pblk->w_rq_pool); + mempool_exit(&pblk->w_rq_pool); free_e_rq_pool: - mempool_destroy(pblk->e_rq_pool); + mempool_exit(&pblk->e_rq_pool); free_r_rq_pool: - mempool_destroy(pblk->r_rq_pool); + mempool_exit(&pblk->r_rq_pool); free_rec_pool: - mempool_destroy(pblk->rec_pool); + mempool_exit(&pblk->rec_pool); free_gen_ws_pool: - mempool_destroy(pblk->gen_ws_pool); + mempool_exit(&pblk->gen_ws_pool); free_page_bio_pool: - mempool_destroy(pblk->page_bio_pool); + mempool_exit(&pblk->page_bio_pool); free_global_caches: pblk_free_global_caches(pblk); fail_free_pad_dist: @@ -462,12 +476,12 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_bio_pool); - mempool_destroy(pblk->gen_ws_pool); - mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->r_rq_pool); - mempool_destroy(pblk->e_rq_pool); - mempool_destroy(pblk->w_rq_pool); + mempool_exit(&pblk->page_bio_pool); + mempool_exit(&pblk->gen_ws_pool); + mempool_exit(&pblk->rec_pool); + mempool_exit(&pblk->r_rq_pool); + mempool_exit(&pblk->e_rq_pool); + mempool_exit(&pblk->w_rq_pool); pblk_free_global_caches(pblk); kfree(pblk->pad_dist); @@ -489,11 +503,17 @@ static void pblk_line_mg_free(struct pblk *pblk) } } -static void pblk_line_meta_free(struct pblk_line *line) +static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, + struct pblk_line *line) { + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + kfree(line->blk_bitmap); kfree(line->erase_bitmap); kfree(line->chks); + + pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type); + kfree(w_err_gc); } static void pblk_lines_free(struct pblk *pblk) @@ -506,8 +526,8 @@ static void pblk_lines_free(struct pblk *pblk) for (i = 0; i < l_mg->nr_lines; i++) { line = &pblk->lines[i]; - pblk_line_free(pblk, line); - pblk_line_meta_free(line); + pblk_line_free(line); + pblk_line_meta_free(l_mg, line); } spin_unlock(&l_mg->free_lock); @@ -748,14 +768,14 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, chunk->cnlb = chunk_meta->cnlb; chunk->wp = chunk_meta->wp; - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - if (chunk->type & NVM_CHK_TP_SZ_SPEC) { WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); continue; } + if (!(chunk->state & NVM_CHK_ST_OFFLINE)) + continue; + set_bit(pos, line->blk_bitmap); nr_bad_chks++; } @@ -809,20 +829,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) return -ENOMEM; line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->erase_bitmap) { - kfree(line->blk_bitmap); - return -ENOMEM; - } + if (!line->erase_bitmap) + goto free_blk_bitmap; + line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta), GFP_KERNEL); - if (!line->chks) { - kfree(line->erase_bitmap); - kfree(line->blk_bitmap); - return -ENOMEM; - } + if (!line->chks) + goto free_erase_bitmap; + + line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL); + if (!line->w_err_gc) + goto free_chks; return 0; + +free_chks: + kfree(line->chks); +free_erase_bitmap: + kfree(line->erase_bitmap); +free_blk_bitmap: + kfree(line->blk_bitmap); + return -ENOMEM; } static int pblk_line_mg_init(struct pblk *pblk) @@ -847,12 +875,14 @@ static int pblk_line_mg_init(struct pblk *pblk) INIT_LIST_HEAD(&l_mg->gc_mid_list); INIT_LIST_HEAD(&l_mg->gc_low_list); INIT_LIST_HEAD(&l_mg->gc_empty_list); + INIT_LIST_HEAD(&l_mg->gc_werr_list); INIT_LIST_HEAD(&l_mg->emeta_list); - l_mg->gc_lists[0] = &l_mg->gc_high_list; - l_mg->gc_lists[1] = &l_mg->gc_mid_list; - l_mg->gc_lists[2] = &l_mg->gc_low_list; + l_mg->gc_lists[0] = &l_mg->gc_werr_list; + l_mg->gc_lists[1] = &l_mg->gc_high_list; + l_mg->gc_lists[2] = &l_mg->gc_mid_list; + l_mg->gc_lists[3] = &l_mg->gc_low_list; spin_lock_init(&l_mg->free_lock); spin_lock_init(&l_mg->close_lock); @@ -1047,6 +1077,11 @@ static int pblk_lines_init(struct pblk *pblk) nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); } + if (!nr_free_chks) { + pr_err("pblk: too many bad blocks prevent for sane instance\n"); + return -EINTR; + } + pblk_set_provision(pblk, nr_free_chks); kfree(chunk_meta); @@ -1054,7 +1089,7 @@ static int pblk_lines_init(struct pblk *pblk) fail_free_lines: while (--i >= 0) - pblk_line_meta_free(&pblk->lines[i]); + pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: kfree(chunk_meta); @@ -1110,23 +1145,25 @@ static void pblk_free(struct pblk *pblk) kfree(pblk); } -static void pblk_tear_down(struct pblk *pblk) +static void pblk_tear_down(struct pblk *pblk, bool graceful) { - pblk_pipeline_stop(pblk); + if (graceful) + __pblk_pipeline_flush(pblk); + __pblk_pipeline_stop(pblk); pblk_writer_stop(pblk); pblk_rb_sync_l2p(&pblk->rwb); pblk_rl_free(&pblk->rl); - pr_debug("pblk: consistent tear down\n"); + pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful); } -static void pblk_exit(void *private) +static void pblk_exit(void *private, bool graceful) { struct pblk *pblk = private; down_write(&pblk_lock); - pblk_gc_exit(pblk); - pblk_tear_down(pblk); + pblk_gc_exit(pblk, graceful); + pblk_tear_down(pblk, graceful); #ifdef CONFIG_NVM_DEBUG pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); @@ -1175,6 +1212,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->state = PBLK_STATE_RUNNING; pblk->gc.gc_enabled = 0; + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); @@ -1297,18 +1335,18 @@ static int __init pblk_module_init(void) { int ret; - pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!pblk_bio_set) - return -ENOMEM; + ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) + return ret; ret = nvm_register_tgt_type(&tt_pblk); if (ret) - bioset_free(pblk_bio_set); + bioset_exit(&pblk_bio_set); return ret; } static void pblk_module_exit(void) { - bioset_free(pblk_bio_set); + bioset_exit(&pblk_bio_set); nvm_unregister_tgt_type(&tt_pblk); } diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 20dbaa89c9df..953ca31dda68 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -18,11 +18,11 @@ #include "pblk.h" -static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, - struct ppa_addr *ppa_list, - unsigned long *lun_bitmap, - struct pblk_sec_meta *meta_list, - unsigned int valid_secs) +static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, + struct ppa_addr *ppa_list, + unsigned long *lun_bitmap, + struct pblk_sec_meta *meta_list, + unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); struct pblk_emeta *emeta; @@ -35,8 +35,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, if (pblk_line_is_full(line)) { struct pblk_line *prev_line = line; + /* If we cannot allocate a new line, make sure to store metadata + * on current line and then fail + */ line = pblk_line_replace_data(pblk); pblk_line_close_meta(pblk, prev_line); + + if (!line) + return -EINTR; } emeta = line->emeta; @@ -74,6 +80,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); + return 0; } void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, @@ -87,8 +94,12 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], - lun_bitmap, &meta_list[i], map_secs); + if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs)) { + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + pblk_pipeline_stop(pblk); + } } } @@ -108,8 +119,12 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], - lun_bitmap, &meta_list[i], map_secs); + if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs)) { + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + pblk_pipeline_stop(pblk); + } erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 52fdd85dbc97..00cd1f20a196 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; -try: flags = READ_ONCE(w_ctx->flags); - if (!(flags & PBLK_SUBMITTED_ENTRY)) - goto try; + WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY), + "pblk: overwriting unsubmitted data\n"); /* Release flags on context. Protect from writes and reads */ smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); @@ -350,7 +349,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, } static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, - unsigned int pos) + unsigned int pos) { struct pblk_rb_entry *entry; unsigned int sync, flush_point; @@ -420,7 +419,7 @@ void pblk_rb_flush(struct pblk_rb *rb) if (pblk_rb_flush_point_set(rb, NULL, mem)) return; - pblk_write_should_kick(pblk); + pblk_write_kick(pblk); } static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, @@ -504,45 +503,6 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, } /* - * The caller of this function must ensure that the backpointer will not - * overwrite the entries passed on the list. - */ -unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, - struct list_head *list, - unsigned int max) -{ - struct pblk_rb_entry *entry, *tentry; - struct page *page; - unsigned int read = 0; - int ret; - - list_for_each_entry_safe(entry, tentry, list, index) { - if (read > max) { - pr_err("pblk: too many entries on list\n"); - goto out; - } - - page = virt_to_page(entry->data); - if (!page) { - pr_err("pblk: could not allocate write bio page\n"); - goto out; - } - - ret = bio_add_page(bio, page, rb->seg_size, 0); - if (ret != rb->seg_size) { - pr_err("pblk: could not add page to write bio\n"); - goto out; - } - - list_del(&entry->index); - read++; - } - -out: - return read; -} - -/* * Read available entries on rb and add them to the given bio. To avoid a memory * copy, a page reference to the write buffer is used to be added to the bio. * diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 9eee10f69df0..18694694e5f0 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -39,10 +39,10 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, } static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba, unsigned long *read_bitmap) + struct bio *bio, sector_t blba, + unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct bio *bio = rqd->bio; struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; @@ -102,32 +102,69 @@ next: #endif } -static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) + +static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, + sector_t blba) { - int err; + struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + int nr_lbas = rqd->nr_ppas; + int i; - err = pblk_submit_io(pblk, rqd); - if (err) - return NVM_IO_ERR; + for (i = 0; i < nr_lbas; i++) { + u64 lba = le64_to_cpu(meta_lba_list[i].lba); + + if (lba == ADDR_EMPTY) + continue; + + if (lba != blba + i) { +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *p; - return NVM_IO_OK; + p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr; + print_ppa(&pblk->dev->geo, p, "seq", i); +#endif + pr_err("pblk: corrupted read LBA (%llu/%llu)\n", + lba, (u64)blba + i); + WARN_ON(1); + } + } } -static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba) +/* + * There can be holes in the lba list. + */ +static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, + u64 *lba_list, int nr_lbas) { - struct pblk_sec_meta *meta_list = rqd->meta_list; - int nr_lbas = rqd->nr_ppas; - int i; + struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + int i, j; - for (i = 0; i < nr_lbas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); + for (i = 0, j = 0; i < nr_lbas; i++) { + u64 lba = lba_list[i]; + u64 meta_lba; if (lba == ADDR_EMPTY) continue; - WARN(lba != blba + i, "pblk: corrupted read LBA\n"); + meta_lba = le64_to_cpu(meta_lba_list[j].lba); + + if (lba != meta_lba) { +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *p; + int nr_ppas = rqd->nr_ppas; + + p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr; + print_ppa(&pblk->dev->geo, p, "seq", j); +#endif + pr_err("pblk: corrupted read LBA (%llu/%llu)\n", + lba, meta_lba); + WARN_ON(1); + } + + j++; } + + WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) @@ -152,7 +189,6 @@ static void pblk_end_user_read(struct bio *bio) WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); #endif bio_endio(bio); - bio_put(bio); } static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, @@ -160,23 +196,18 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; + struct bio *int_bio = rqd->bio; unsigned long start_time = r_ctx->start_time; generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time); if (rqd->error) pblk_log_read_err(pblk, rqd); -#ifdef CONFIG_NVM_DEBUG - else - WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); -#endif - pblk_read_check(pblk, rqd, r_ctx->lba); + pblk_read_check_seq(pblk, rqd, r_ctx->lba); - bio_put(bio); - if (r_ctx->private) - pblk_end_user_read((struct bio *)r_ctx->private); + if (int_bio) + bio_put(int_bio); if (put_line) pblk_read_put_rqd_kref(pblk, rqd); @@ -193,16 +224,19 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, static void pblk_end_io_read(struct nvm_rq *rqd) { struct pblk *pblk = rqd->private; + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = (struct bio *)r_ctx->private; + pblk_end_user_read(bio); __pblk_end_io_read(pblk, rqd, true); } -static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int bio_init_idx, - unsigned long *read_bitmap) +static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd, + struct bio *orig_bio, unsigned int bio_init_idx, + unsigned long *read_bitmap) { - struct bio *new_bio, *bio = rqd->bio; struct pblk_sec_meta *meta_list = rqd->meta_list; + struct bio *new_bio; struct bio_vec src_bv, dst_bv; void *ppa_ptr = NULL; void *src_p, *dst_p; @@ -219,11 +253,11 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, new_bio = bio_alloc(GFP_KERNEL, nr_holes); if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) - goto err; + goto fail_add_pages; if (nr_holes != new_bio->bi_vcnt) { pr_err("pblk: malformed bio\n"); - goto err; + goto fail; } for (i = 0; i < nr_secs; i++) @@ -246,7 +280,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, if (ret) { bio_put(rqd->bio); pr_err("pblk: sync read IO submission failed\n"); - goto err; + goto fail; } if (rqd->error) { @@ -282,7 +316,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, meta_list[hole].lba = lba_list_media[i]; src_bv = new_bio->bi_io_vec[i++]; - dst_bv = bio->bi_io_vec[bio_init_idx + hole]; + dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole]; src_p = kmap_atomic(src_bv.bv_page); dst_p = kmap_atomic(dst_bv.bv_page); @@ -294,35 +328,33 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, kunmap_atomic(src_p); kunmap_atomic(dst_p); - mempool_free(src_bv.bv_page, pblk->page_bio_pool); + mempool_free(src_bv.bv_page, &pblk->page_bio_pool); hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); } while (hole < nr_secs); bio_put(new_bio); - /* Complete the original bio and associated request */ - bio_endio(bio); - rqd->bio = bio; + /* restore original request */ + rqd->bio = NULL; rqd->nr_ppas = nr_secs; __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_OK; - -err: - pr_err("pblk: failed to perform partial read\n"); + return NVM_IO_DONE; +fail: /* Free allocated pages in new bio */ - pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); + pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt); +fail_add_pages: + pr_err("pblk: failed to perform partial read\n"); __pblk_end_io_read(pblk, rqd, false); return NVM_IO_ERR; } -static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, +static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t lba, unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct bio *bio = rqd->bio; struct ppa_addr ppa; pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -386,14 +418,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd->opcode = NVM_OP_PREAD; - rqd->bio = bio; rqd->nr_ppas = nr_secs; + rqd->bio = NULL; /* cloned bio if needed */ rqd->private = pblk; rqd->end_io = pblk_end_io_read; r_ctx = nvm_rq_to_pdu(rqd); r_ctx->start_time = jiffies; r_ctx->lba = blba; + r_ctx->private = bio; /* original bio */ /* Save the index for this bio's start. This is needed in case * we need to fill a partial read. @@ -411,17 +444,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap); + pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap); } else { - pblk_read_rq(pblk, rqd, blba, &read_bitmap); + pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap); } - bio_get(bio); if (bitmap_full(&read_bitmap, nr_secs)) { - bio_endio(bio); atomic_inc(&pblk->inflight_io); __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_OK; + return NVM_IO_DONE; } /* All sectors are to be read from the device */ @@ -429,20 +460,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) struct bio *int_bio = NULL; /* Clone read bio to deal with read errors internally */ - int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); + int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); if (!int_bio) { pr_err("pblk: could not clone read bio\n"); goto fail_end_io; } rqd->bio = int_bio; - r_ctx->private = bio; - ret = pblk_submit_read_io(pblk, rqd); - if (ret) { + if (pblk_submit_io(pblk, rqd)) { pr_err("pblk: read IO submission failed\n"); - if (int_bio) - bio_put(int_bio); + ret = NVM_IO_ERR; goto fail_end_io; } @@ -452,7 +480,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) /* The read bio request could be partially filled by the write buffer, * but there are some holes that need to be read from the drive. */ - return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); + return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap); fail_rqd_free: pblk_free_rqd(pblk, rqd, PBLK_READ); @@ -585,6 +613,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) goto err_free_bio; } + pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs); + atomic_dec(&pblk->inflight_io); if (rqd.error) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 3e079c2afa6e..598342833d0d 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -16,97 +16,6 @@ #include "pblk.h" -void pblk_submit_rec(struct work_struct *work) -{ - struct pblk_rec_ctx *recovery = - container_of(work, struct pblk_rec_ctx, ws_rec); - struct pblk *pblk = recovery->pblk; - struct nvm_rq *rqd = recovery->rqd; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio; - unsigned int nr_rec_secs; - unsigned int pgs_read; - int ret; - - nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, - NVM_MAX_VLBA); - - bio = bio_alloc(GFP_KERNEL, nr_rec_secs); - - bio->bi_iter.bi_sector = 0; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - rqd->bio = bio; - rqd->nr_ppas = nr_rec_secs; - - pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed, - nr_rec_secs); - if (pgs_read != nr_rec_secs) { - pr_err("pblk: could not read recovery entries\n"); - goto err; - } - - if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) { - pr_err("pblk: could not setup recovery request\n"); - goto err; - } - -#ifdef CONFIG_NVM_DEBUG - atomic_long_add(nr_rec_secs, &pblk->recov_writes); -#endif - - ret = pblk_submit_io(pblk, rqd); - if (ret) { - pr_err("pblk: I/O submission failed: %d\n", ret); - goto err; - } - - mempool_free(recovery, pblk->rec_pool); - return; - -err: - bio_put(bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); -} - -int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, - struct pblk_rec_ctx *recovery, u64 *comp_bits, - unsigned int comp) -{ - struct nvm_rq *rec_rqd; - struct pblk_c_ctx *rec_ctx; - int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; - - rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); - rec_ctx = nvm_rq_to_pdu(rec_rqd); - - /* Copy completion bitmap, but exclude the first X completed entries */ - bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, - (unsigned long int *)comp_bits, - comp, NVM_MAX_VLBA); - - /* Save the context for the entries that need to be re-written and - * update current context with the completed entries. - */ - rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp); - if (comp >= c_ctx->nr_valid) { - rec_ctx->nr_valid = 0; - rec_ctx->nr_padded = nr_entries - comp; - - c_ctx->nr_padded = comp - c_ctx->nr_valid; - } else { - rec_ctx->nr_valid = c_ctx->nr_valid - comp; - rec_ctx->nr_padded = c_ctx->nr_padded; - - c_ctx->nr_valid = comp; - c_ctx->nr_padded = 0; - } - - recovery->rqd = rec_rqd; - recovery->pblk = pblk; - - return 0; -} - int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { u32 crc; @@ -865,18 +774,30 @@ static void pblk_recov_wa_counters(struct pblk *pblk, } static int pblk_line_was_written(struct pblk_line *line, - struct pblk_line_meta *lm) + struct pblk *pblk) { - int i; - int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE; + struct pblk_line_meta *lm = &pblk->lm; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_chk_meta *chunk; + struct ppa_addr bppa; + int smeta_blk; - for (i = 0; i < lm->blk_per_line; i++) { - if (!(line->chks[i].state & state_mask)) - return 1; - } + if (line->state == PBLK_LINESTATE_BAD) + return 0; - return 0; + smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (smeta_blk >= lm->blk_per_line) + return 0; + + bppa = pblk->luns[smeta_blk].bppa; + chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; + + if (chunk->state & NVM_CHK_ST_FREE) + return 0; + + return 1; } struct pblk_line *pblk_recov_l2p(struct pblk *pblk) @@ -915,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta); - if (!pblk_line_was_written(line, lm)) + if (!pblk_line_was_written(line, pblk)) continue; /* Lines that cannot be read are assumed as not written here */ diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 883a7113b19d..6a0616a6fcaf 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) pblk_rl_kick_u_timer(rl); } +void pblk_rl_werr_line_in(struct pblk_rl *rl) +{ + atomic_inc(&rl->werr_lines); +} + +void pblk_rl_werr_line_out(struct pblk_rl *rl) +{ + atomic_dec(&rl->werr_lines); +} + void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) { atomic_add(nr_entries, &rl->rb_gc_cnt); @@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, { struct pblk *pblk = container_of(rl, struct pblk, rl); int max = rl->rb_budget; + int werr_gc_needed = atomic_read(&rl->werr_lines); if (free_blocks >= rl->high) { - rl->rb_user_max = max; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_HIGH; + if (werr_gc_needed) { + /* Allocate a small budget for recovering + * lines with write errors + */ + rl->rb_gc_max = 1 << rl->rb_windows_pw; + rl->rb_user_max = max - rl->rb_gc_max; + rl->rb_state = PBLK_RL_WERR; + } else { + rl->rb_user_max = max; + rl->rb_gc_max = 0; + rl->rb_state = PBLK_RL_OFF; + } } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; @@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, rl->rb_state = PBLK_RL_LOW; } - if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW)) + if (rl->rb_state != PBLK_RL_OFF) pblk_gc_should_start(pblk); else pblk_gc_should_stop(pblk); @@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) atomic_set(&rl->rb_user_cnt, 0); atomic_set(&rl->rb_gc_cnt, 0); atomic_set(&rl->rb_space, -1); + atomic_set(&rl->werr_lines, 0); timer_setup(&rl->u_timer, pblk_rl_u_timer, 0); diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index e61909af23a5..88a0a7c407aa 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0; int d_line_cnt = 0, l_line_cnt = 0; int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; + int gc_werr = 0; + int bad = 0, cor = 0; int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; int map_weight = 0, meta_weight = 0; @@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) gc_empty++; } + list_for_each_entry(line, &l_mg->gc_werr_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_werr++; + } + list_for_each_entry(line, &l_mg->bad_list, list) bad++; list_for_each_entry(line, &l_mg->corrupt_list, list) @@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) l_mg->nr_lines); sz += snprintf(page + sz, PAGE_SIZE - sz, - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", - gc_full, gc_high, gc_mid, gc_low, gc_empty, + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", + gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, atomic_read(&pblk->gc.read_inflight_gc)); sz += snprintf(page + sz, PAGE_SIZE - sz, diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 3e6f1ebd743a..f353e52941f5 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -103,68 +103,150 @@ retry: pblk_rb_sync_end(&pblk->rwb, &flags); } -/* When a write fails, we are not sure whether the block has grown bad or a page - * range is more susceptible to write errors. If a high number of pages fail, we - * assume that the block is bad and we mark it accordingly. In all cases, we - * remap and resubmit the failed entries as fast as possible; if a flush is - * waiting on a completion, the whole stack would stall otherwise. - */ -static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +/* Map remaining sectors in chunk, starting from ppa */ +static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) { - void *comp_bits = &rqd->ppa_status; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct pblk_rec_ctx *recovery; - struct ppa_addr *ppa_list = rqd->ppa_list; - int nr_ppas = rqd->nr_ppas; - unsigned int c_entries; - int bit, ret; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line; + struct ppa_addr map_ppa = *ppa; + u64 paddr; + int done = 0; - if (unlikely(nr_ppas == 1)) - ppa_list = &rqd->ppa_addr; + line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + spin_lock(&line->lock); - recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); + while (!done) { + paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa); - INIT_LIST_HEAD(&recovery->failed); + if (!test_and_set_bit(paddr, line->map_bitmap)) + line->left_msecs--; - bit = -1; - while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) { - struct pblk_rb_entry *entry; - struct ppa_addr ppa; + if (!test_and_set_bit(paddr, line->invalid_bitmap)) + le32_add_cpu(line->vsc, -1); - /* Logic error */ - if (bit > c_ctx->nr_valid) { - WARN_ONCE(1, "pblk: corrupted write request\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; + if (geo->version == NVM_OCSSD_SPEC_12) { + map_ppa.ppa++; + if (map_ppa.g.pg == geo->num_pg) + done = 1; + } else { + map_ppa.m.sec++; + if (map_ppa.m.sec == geo->clba) + done = 1; } + } - ppa = ppa_list[bit]; - entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa); - if (!entry) { - pr_err("pblk: could not scan entry on write failure\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; - } + line->w_err_gc->has_write_err = 1; + spin_unlock(&line->lock); +} - /* The list is filled first and emptied afterwards. No need for - * protecting it with a lock +static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, + unsigned int nr_entries) +{ + struct pblk_rb *rb = &pblk->rwb; + struct pblk_rb_entry *entry; + struct pblk_line *line; + struct pblk_w_ctx *w_ctx; + struct ppa_addr ppa_l2p; + int flags; + unsigned int pos, i; + + spin_lock(&pblk->trans_lock); + pos = sentry; + for (i = 0; i < nr_entries; i++) { + entry = &rb->entries[pos]; + w_ctx = &entry->w_ctx; + + /* Check if the lba has been overwritten */ + ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); + if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) + w_ctx->lba = ADDR_EMPTY; + + /* Mark up the entry as submittable again */ + flags = READ_ONCE(w_ctx->flags); + flags |= PBLK_WRITTEN_DATA; + /* Release flags on write context. Protect from writes */ + smp_store_release(&w_ctx->flags, flags); + + /* Decrese the reference count to the line as we will + * re-map these entries */ - list_add_tail(&entry->index, &recovery->failed); + line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + kref_put(&line->ref, pblk_line_put); + + pos = (pos + 1) & (rb->nr_entries - 1); } + spin_unlock(&pblk->trans_lock); +} - c_entries = find_first_bit(comp_bits, nr_ppas); - ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries); - if (ret) { - pr_err("pblk: could not recover from write failure\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; +static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx) +{ + struct pblk_c_ctx *r_ctx; + + r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL); + if (!r_ctx) + return; + + r_ctx->lun_bitmap = NULL; + r_ctx->sentry = c_ctx->sentry; + r_ctx->nr_valid = c_ctx->nr_valid; + r_ctx->nr_padded = c_ctx->nr_padded; + + spin_lock(&pblk->resubmit_lock); + list_add_tail(&r_ctx->list, &pblk->resubmit_list); + spin_unlock(&pblk->resubmit_lock); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes); +#endif +} + +static void pblk_submit_rec(struct work_struct *work) +{ + struct pblk_rec_ctx *recovery = + container_of(work, struct pblk_rec_ctx, ws_rec); + struct pblk *pblk = recovery->pblk; + struct nvm_rq *rqd = recovery->rqd; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct ppa_addr *ppa_list; + + pblk_log_write_err(pblk, rqd); + + if (rqd->nr_ppas == 1) + ppa_list = &rqd->ppa_addr; + else + ppa_list = rqd->ppa_list; + + pblk_map_remaining(pblk, ppa_list); + pblk_queue_resubmit(pblk, c_ctx); + + pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, + c_ctx->nr_padded); + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + mempool_free(recovery, &pblk->rec_pool); + + atomic_dec(&pblk->inflight_io); +} + + +static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct pblk_rec_ctx *recovery; + + recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC); + if (!recovery) { + pr_err("pblk: could not allocate recovery work\n"); + return; } + recovery->pblk = pblk; + recovery->rqd = rqd; + INIT_WORK(&recovery->ws_rec, pblk_submit_rec); queue_work(pblk->close_wq, &recovery->ws_rec); - -out: - pblk_complete_write(pblk, rqd, c_ctx); } static void pblk_end_io_write(struct nvm_rq *rqd) @@ -173,8 +255,8 @@ static void pblk_end_io_write(struct nvm_rq *rqd) struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); if (rqd->error) { - pblk_log_write_err(pblk, rqd); - return pblk_end_w_fail(pblk, rqd); + pblk_end_w_fail(pblk, rqd); + return; } #ifdef CONFIG_NVM_DEBUG else @@ -198,6 +280,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) if (rqd->error) { pblk_log_write_err(pblk, rqd); pr_err("pblk: metadata I/O failed. Line %d\n", line->id); + line->w_err_gc->has_write_err = 1; } sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); @@ -266,31 +349,6 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, return 0; } -int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct pblk_line_meta *lm = &pblk->lm; - unsigned long *lun_bitmap; - int ret; - - lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); - if (!lun_bitmap) - return -ENOMEM; - - c_ctx->lun_bitmap = lun_bitmap; - - ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write); - if (ret) - return ret; - - pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); - - rqd->ppa_status = (u64)0; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - - return ret; -} - static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, unsigned int secs_to_flush) { @@ -339,6 +397,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, l_mg->emeta_alloc_type, GFP_KERNEL); if (IS_ERR(bio)) { + pr_err("pblk: failed to map emeta io"); ret = PTR_ERR(bio); goto fail_free_rqd; } @@ -515,26 +574,54 @@ static int pblk_submit_write(struct pblk *pblk) unsigned int secs_avail, secs_to_sync, secs_to_com; unsigned int secs_to_flush; unsigned long pos; + unsigned int resubmit; - /* If there are no sectors in the cache, flushes (bios without data) - * will be cleared on the cache threads - */ - secs_avail = pblk_rb_read_count(&pblk->rwb); - if (!secs_avail) - return 1; - - secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs) - return 1; - - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); - if (secs_to_sync > pblk->max_write_pgs) { - pr_err("pblk: bad buffer sync calculation\n"); - return 1; - } + spin_lock(&pblk->resubmit_lock); + resubmit = !list_empty(&pblk->resubmit_list); + spin_unlock(&pblk->resubmit_lock); + + /* Resubmit failed writes first */ + if (resubmit) { + struct pblk_c_ctx *r_ctx; + + spin_lock(&pblk->resubmit_lock); + r_ctx = list_first_entry(&pblk->resubmit_list, + struct pblk_c_ctx, list); + list_del(&r_ctx->list); + spin_unlock(&pblk->resubmit_lock); + + secs_avail = r_ctx->nr_valid; + pos = r_ctx->sentry; + + pblk_prepare_resubmit(pblk, pos, secs_avail); + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, + secs_avail); - secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; - pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + kfree(r_ctx); + } else { + /* If there are no sectors in the cache, + * flushes (bios without data) will be cleared on + * the cache threads + */ + secs_avail = pblk_rb_read_count(&pblk->rwb); + if (!secs_avail) + return 1; + + secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); + if (!secs_to_flush && secs_avail < pblk->min_write_pgs) + return 1; + + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, + secs_to_flush); + if (secs_to_sync > pblk->max_write_pgs) { + pr_err("pblk: bad buffer sync calculation\n"); + return 1; + } + + secs_to_com = (secs_to_sync > secs_avail) ? + secs_avail : secs_to_sync; + pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + } bio = bio_alloc(GFP_KERNEL, secs_to_sync); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 9c682acfc5d1..34cc1d64a9d4 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -89,12 +89,14 @@ struct pblk_sec_meta { /* The number of GC lists and the rate-limiter states go together. This way the * rate-limiter can dictate how much GC is needed based on resource utilization. */ -#define PBLK_GC_NR_LISTS 3 +#define PBLK_GC_NR_LISTS 4 enum { - PBLK_RL_HIGH = 1, - PBLK_RL_MID = 2, - PBLK_RL_LOW = 3, + PBLK_RL_OFF = 0, + PBLK_RL_WERR = 1, + PBLK_RL_HIGH = 2, + PBLK_RL_MID = 3, + PBLK_RL_LOW = 4 }; #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) @@ -128,7 +130,6 @@ struct pblk_pad_rq { struct pblk_rec_ctx { struct pblk *pblk; struct nvm_rq *rqd; - struct list_head failed; struct work_struct ws_rec; }; @@ -279,6 +280,8 @@ struct pblk_rl { int rb_user_active; int rb_gc_active; + atomic_t werr_lines; /* Number of write error lines that needs gc */ + struct timer_list u_timer; unsigned long long nr_secs; @@ -312,6 +315,7 @@ enum { PBLK_LINEGC_MID = 23, PBLK_LINEGC_HIGH = 24, PBLK_LINEGC_FULL = 25, + PBLK_LINEGC_WERR = 26 }; #define PBLK_MAGIC 0x70626c6b /*pblk*/ @@ -413,6 +417,11 @@ struct pblk_smeta { struct line_smeta *buf; /* smeta buffer in persistent format */ }; +struct pblk_w_err_gc { + int has_write_err; + __le64 *lba_list; +}; + struct pblk_line { struct pblk *pblk; unsigned int id; /* Line number corresponds to the @@ -458,6 +467,8 @@ struct pblk_line { struct kref ref; /* Write buffer L2P references */ + struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */ + spinlock_t lock; /* Necessary for invalid_bitmap only */ }; @@ -489,6 +500,8 @@ struct pblk_line_mgmt { struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ struct list_head gc_low_list; /* Full lines ready to GC, low isc */ + struct list_head gc_werr_list; /* Write err recovery list */ + struct list_head gc_full_list; /* Full lines ready to GC, no valid */ struct list_head gc_empty_list; /* Full lines close, all valid */ @@ -664,12 +677,15 @@ struct pblk { struct list_head compl_list; - mempool_t *page_bio_pool; - mempool_t *gen_ws_pool; - mempool_t *rec_pool; - mempool_t *r_rq_pool; - mempool_t *w_rq_pool; - mempool_t *e_rq_pool; + spinlock_t resubmit_lock; /* Resubmit list lock */ + struct list_head resubmit_list; /* Resubmit list for failed writes*/ + + mempool_t page_bio_pool; + mempool_t gen_ws_pool; + mempool_t rec_pool; + mempool_t r_rq_pool; + mempool_t w_rq_pool; + mempool_t e_rq_pool; struct workqueue_struct *close_wq; struct workqueue_struct *bb_wq; @@ -713,9 +729,6 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb); unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, unsigned int pos, unsigned int nr_entries, unsigned int count); -unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, - struct list_head *list, - unsigned int max); int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, struct ppa_addr ppa, int bio_iter, bool advanced_bio); unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); @@ -766,11 +779,13 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk); struct pblk_line *pblk_line_get_erase(struct pblk *pblk); int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); int pblk_line_is_full(struct pblk_line *line); -void pblk_line_free(struct pblk *pblk, struct pblk_line *line); +void pblk_line_free(struct pblk_line *line); void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); void pblk_line_close(struct pblk *pblk, struct pblk_line *line); void pblk_line_close_ws(struct work_struct *work); void pblk_pipeline_stop(struct pblk *pblk); +void __pblk_pipeline_stop(struct pblk *pblk); +void __pblk_pipeline_flush(struct pblk *pblk); void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); @@ -794,7 +809,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, unsigned long *lun_bitmap); -void pblk_end_io_sync(struct nvm_rq *rqd); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, @@ -837,23 +851,20 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, int pblk_write_ts(void *data); void pblk_write_timer_fn(struct timer_list *t); void pblk_write_should_kick(struct pblk *pblk); +void pblk_write_kick(struct pblk *pblk); /* * pblk read path */ -extern struct bio_set *pblk_bio_set; +extern struct bio_set pblk_bio_set; int pblk_submit_read(struct pblk *pblk, struct bio *bio); int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk recovery */ -void pblk_submit_rec(struct work_struct *work); struct pblk_line *pblk_recov_l2p(struct pblk *pblk); int pblk_recov_pad(struct pblk *pblk); int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); -int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, - struct pblk_rec_ctx *recovery, u64 *comp_bits, - unsigned int comp); /* * pblk gc @@ -864,7 +875,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, #define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ int pblk_gc_init(struct pblk *pblk); -void pblk_gc_exit(struct pblk *pblk); +void pblk_gc_exit(struct pblk *pblk, bool graceful); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_should_kick(struct pblk *pblk); @@ -894,6 +905,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, bool used); int pblk_rl_is_limit(struct pblk_rl *rl); +void pblk_rl_werr_line_in(struct pblk_rl *rl); +void pblk_rl_werr_line_out(struct pblk_rl *rl); + /* * pblk sysfs */ diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 433dbeddfcf9..6663893f41c4 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -191,10 +191,10 @@ static int init_pmu(void); static void pmu_start(void); static irqreturn_t via_pmu_interrupt(int irq, void *arg); static irqreturn_t gpio1_interrupt(int irq, void *arg); -static const struct file_operations pmu_info_proc_fops; -static const struct file_operations pmu_irqstats_proc_fops; +static int pmu_info_proc_show(struct seq_file *m, void *v); +static int pmu_irqstats_proc_show(struct seq_file *m, void *v); +static int pmu_battery_proc_show(struct seq_file *m, void *v); static void pmu_pass_intr(unsigned char *data, int len); -static const struct file_operations pmu_battery_proc_fops; static const struct file_operations pmu_options_proc_fops; #ifdef CONFIG_ADB @@ -511,13 +511,15 @@ static int __init via_pmu_dev_init(void) for (i=0; i<pmu_battery_count; i++) { char title[16]; sprintf(title, "battery_%ld", i); - proc_pmu_batt[i] = proc_create_data(title, 0, proc_pmu_root, - &pmu_battery_proc_fops, (void *)i); + proc_pmu_batt[i] = proc_create_single_data(title, 0, + proc_pmu_root, pmu_battery_proc_show, + (void *)i); } - proc_pmu_info = proc_create("info", 0, proc_pmu_root, &pmu_info_proc_fops); - proc_pmu_irqstats = proc_create("interrupts", 0, proc_pmu_root, - &pmu_irqstats_proc_fops); + proc_pmu_info = proc_create_single("info", 0, proc_pmu_root, + pmu_info_proc_show); + proc_pmu_irqstats = proc_create_single("interrupts", 0, + proc_pmu_root, pmu_irqstats_proc_show); proc_pmu_options = proc_create("options", 0600, proc_pmu_root, &pmu_options_proc_fops); } @@ -811,19 +813,6 @@ static int pmu_info_proc_show(struct seq_file *m, void *v) return 0; } -static int pmu_info_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmu_info_proc_show, NULL); -} - -static const struct file_operations pmu_info_proc_fops = { - .owner = THIS_MODULE, - .open = pmu_info_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pmu_irqstats_proc_show(struct seq_file *m, void *v) { int i; @@ -848,19 +837,6 @@ static int pmu_irqstats_proc_show(struct seq_file *m, void *v) return 0; } -static int pmu_irqstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmu_irqstats_proc_show, NULL); -} - -static const struct file_operations pmu_irqstats_proc_fops = { - .owner = THIS_MODULE, - .open = pmu_irqstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pmu_battery_proc_show(struct seq_file *m, void *v) { long batnum = (long)m->private; @@ -875,19 +851,6 @@ static int pmu_battery_proc_show(struct seq_file *m, void *v) return 0; } -static int pmu_battery_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmu_battery_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations pmu_battery_proc_fops = { - .owner = THIS_MODULE, - .open = pmu_battery_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pmu_options_proc_show(struct seq_file *m, void *v) { #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 3a0cfb237af9..d6bf294f3907 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -269,7 +269,7 @@ struct bcache_device { atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; - struct bio_set *bio_split; + struct bio_set bio_split; unsigned data_csum:1; @@ -345,6 +345,7 @@ struct cached_dev { struct keybuf writeback_keys; + struct task_struct *status_update_thread; /* * Order the write-half of writeback operations strongly in dispatch * order. (Maintain LBA order; don't allow reads completing out of @@ -392,6 +393,7 @@ struct cached_dev { #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 atomic_t io_errors; unsigned error_limit; + unsigned offline_seconds; char backing_dev_name[BDEVNAME_SIZE]; }; @@ -528,9 +530,9 @@ struct cache_set { struct closure sb_write; struct semaphore sb_write_mutex; - mempool_t *search; - mempool_t *bio_meta; - struct bio_set *bio_split; + mempool_t search; + mempool_t bio_meta; + struct bio_set bio_split; /* For the btree cache */ struct shrinker shrink; @@ -655,7 +657,7 @@ struct cache_set { * A btree node on disk could have too many bsets for an iterator to fit * on the stack - have to dynamically allocate them */ - mempool_t *fill_iter; + mempool_t fill_iter; struct bset_sort_state sort; @@ -956,8 +958,6 @@ void bch_prio_write(struct cache *); void bch_write_bdev_super(struct cached_dev *, struct closure *); extern struct workqueue_struct *bcache_wq; -extern const char * const bch_cache_modes[]; -extern const char * const bch_stop_on_failure_modes[]; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 579c696a5fe0..f3403b45bc28 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1118,8 +1118,7 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_bset_sort_state_free(struct bset_sort_state *state) { - if (state->pool) - mempool_destroy(state->pool); + mempool_exit(&state->pool); } int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) @@ -1129,11 +1128,7 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) state->page_order = page_order; state->crit_factor = int_sqrt(1 << page_order); - state->pool = mempool_create_page_pool(1, page_order); - if (!state->pool) - return -ENOMEM; - - return 0; + return mempool_init_page_pool(&state->pool, 1, page_order); } EXPORT_SYMBOL(bch_bset_sort_state_init); @@ -1191,7 +1186,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, BUG_ON(order > state->page_order); - outp = mempool_alloc(state->pool, GFP_NOIO); + outp = mempool_alloc(&state->pool, GFP_NOIO); out = page_address(outp); used_mempool = true; order = state->page_order; @@ -1220,7 +1215,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, } if (used_mempool) - mempool_free(virt_to_page(out), state->pool); + mempool_free(virt_to_page(out), &state->pool); else free_pages((unsigned long) out, order); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 0c24280f3b98..b867f2200495 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -347,7 +347,7 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, /* Sorting */ struct bset_sort_state { - mempool_t *pool; + mempool_t pool; unsigned page_order; unsigned crit_factor; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 17936b2dc7d6..2a0968c04e21 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -204,7 +204,7 @@ void bch_btree_node_read_done(struct btree *b) struct bset *i = btree_bset_first(b); struct btree_iter *iter; - iter = mempool_alloc(b->c->fill_iter, GFP_NOIO); + iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; @@ -271,7 +271,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->sb)); out: - mempool_free(iter, b->c->fill_iter); + mempool_free(iter, &b->c->fill_iter); return; err: set_btree_node_io_error(b); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 2ddf8515e6a5..9612873afee2 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -17,12 +17,12 @@ void bch_bbio_free(struct bio *bio, struct cache_set *c) { struct bbio *b = container_of(bio, struct bbio, bio); - mempool_free(b, c->bio_meta); + mempool_free(b, &c->bio_meta); } struct bio *bch_bbio_alloc(struct cache_set *c) { - struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); + struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO); struct bio *bio = &b->bio; bio_init(bio, bio->bi_inline_vecs, bucket_pages(c)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 8e3e8655ed63..ae67f5fa8047 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -213,7 +213,7 @@ static void bch_data_insert_start(struct closure *cl) do { unsigned i; struct bkey *k; - struct bio_set *split = op->c->bio_split; + struct bio_set *split = &op->c->bio_split; /* 1 for the device pointer and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, @@ -548,7 +548,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) n = bio_next_split(bio, min_t(uint64_t, INT_MAX, KEY_OFFSET(k) - bio->bi_iter.bi_sector), - GFP_NOIO, s->d->bio_split); + GFP_NOIO, &s->d->bio_split); bio_key = &container_of(n, struct bbio, bio)->key; bch_bkey_copy_single_ptr(bio_key, k, ptr); @@ -707,7 +707,7 @@ static void search_free(struct closure *cl) bio_complete(s); closure_debug_destroy(cl); - mempool_free(s, s->d->c->search); + mempool_free(s, &s->d->c->search); } static inline struct search *search_alloc(struct bio *bio, @@ -715,7 +715,7 @@ static inline struct search *search_alloc(struct bio *bio, { struct search *s; - s = mempool_alloc(d->c->search, GFP_NOIO); + s = mempool_alloc(&d->c->search, GFP_NOIO); closure_init(&s->cl, NULL); do_bio_hook(s, bio, request_endio); @@ -864,7 +864,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_missed = 1; if (s->cache_miss || s->iop.bypass) { - miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; goto out_submit; } @@ -887,14 +887,14 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->iop.replace = true; - miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split); /* btree_search_recurse()'s btree iterator is no good anymore */ ret = miss == bio ? MAP_DONE : -EINTR; cache_bio = bio_alloc_bioset(GFP_NOWAIT, DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS), - dc->disk.bio_split); + &dc->disk.bio_split); if (!cache_bio) goto out_submit; @@ -1008,7 +1008,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) struct bio *flush; flush = bio_alloc_bioset(GFP_NOIO, 0, - dc->disk.bio_split); + &dc->disk.bio_split); if (!flush) { s->iop.status = BLK_STS_RESOURCE; goto insert_data; @@ -1021,7 +1021,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) closure_bio_submit(s->iop.c, flush, cl); } } else { - s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); + s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split); /* I/O request sent to backing device */ bio->bi_end_io = backing_request_endio; closure_bio_submit(s->iop.c, bio, cl); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3dea06b41d43..a31e55bcc4e5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -37,24 +37,6 @@ static const char invalid_uuid[] = { 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99 }; -/* Default is -1; we skip past it for struct cached_dev's cache mode */ -const char * const bch_cache_modes[] = { - "default", - "writethrough", - "writeback", - "writearound", - "none", - NULL -}; - -/* Default is -1; we skip past it for stop_when_cache_set_failed */ -const char * const bch_stop_on_failure_modes[] = { - "default", - "auto", - "always", - NULL -}; - static struct kobject *bcache_kobj; struct mutex bch_register_lock; LIST_HEAD(bch_cache_sets); @@ -654,6 +636,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode, unsigned int cmd, unsigned long arg) { struct bcache_device *d = b->bd_disk->private_data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + + if (dc->io_disable) + return -EIO; + return d->ioctl(d, mode, cmd, arg); } @@ -766,8 +753,7 @@ static void bcache_device_free(struct bcache_device *d) put_disk(d->disk); } - if (d->bio_split) - bioset_free(d->bio_split); + bioset_exit(&d->bio_split); kvfree(d->full_dirty_stripes); kvfree(d->stripe_sectors_dirty); @@ -809,9 +795,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (idx < 0) return idx; - if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio), - BIOSET_NEED_BVECS | - BIOSET_NEED_RESCUER)) || + if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || !(d->disk = alloc_disk(BCACHE_MINORS))) { ida_simple_remove(&bcache_device_idx, idx); return -ENOMEM; @@ -864,6 +849,44 @@ static void calc_cached_dev_sectors(struct cache_set *c) c->cached_dev_sectors = sectors; } +#define BACKING_DEV_OFFLINE_TIMEOUT 5 +static int cached_dev_status_update(void *arg) +{ + struct cached_dev *dc = arg; + struct request_queue *q; + + /* + * If this delayed worker is stopping outside, directly quit here. + * dc->io_disable might be set via sysfs interface, so check it + * here too. + */ + while (!kthread_should_stop() && !dc->io_disable) { + q = bdev_get_queue(dc->bdev); + if (blk_queue_dying(q)) + dc->offline_seconds++; + else + dc->offline_seconds = 0; + + if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { + pr_err("%s: device offline for %d seconds", + dc->backing_dev_name, + BACKING_DEV_OFFLINE_TIMEOUT); + pr_err("%s: disable I/O request due to backing " + "device offline", dc->disk.name); + dc->io_disable = true; + /* let others know earlier that io_disable is true */ + smp_mb(); + bcache_device_stop(&dc->disk); + break; + } + schedule_timeout_interruptible(HZ); + } + + wait_for_kthread_stop(); + return 0; +} + + void bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; @@ -906,6 +929,14 @@ void bch_cached_dev_run(struct cached_dev *dc) if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) pr_debug("error creating sysfs link"); + + dc->status_update_thread = kthread_run(cached_dev_status_update, + dc, "bcache_status_update"); + if (IS_ERR(dc->status_update_thread)) { + pr_warn("failed to create bcache_status_update kthread, " + "continue to run without monitoring backing " + "device status"); + } } /* @@ -1139,6 +1170,8 @@ static void cached_dev_free(struct closure *cl) kthread_stop(dc->writeback_thread); if (dc->writeback_write_wq) destroy_workqueue(dc->writeback_write_wq); + if (!IS_ERR_OR_NULL(dc->status_update_thread)) + kthread_stop(dc->status_update_thread); if (atomic_read(&dc->running)) bd_unlink_disk_holder(dc->bdev, dc->disk.disk); @@ -1465,14 +1498,10 @@ static void cache_set_free(struct closure *cl) if (c->moving_gc_wq) destroy_workqueue(c->moving_gc_wq); - if (c->bio_split) - bioset_free(c->bio_split); - if (c->fill_iter) - mempool_destroy(c->fill_iter); - if (c->bio_meta) - mempool_destroy(c->bio_meta); - if (c->search) - mempool_destroy(c->search); + bioset_exit(&c->bio_split); + mempool_exit(&c->fill_iter); + mempool_exit(&c->bio_meta); + mempool_exit(&c->search); kfree(c->devices); mutex_lock(&bch_register_lock); @@ -1683,21 +1712,17 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - c->search = mempool_create_slab_pool(32, bch_search_cache); - if (!c->search) - goto err; - iter_size = (sb->bucket_size / sb->block_size + 1) * sizeof(struct btree_iter_set); if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) || - !(c->bio_meta = mempool_create_kmalloc_pool(2, - sizeof(struct bbio) + sizeof(struct bio_vec) * - bucket_pages(c))) || - !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || - !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio), - BIOSET_NEED_BVECS | - BIOSET_NEED_RESCUER)) || + mempool_init_slab_pool(&c->search, 32, bch_search_cache) || + mempool_init_kmalloc_pool(&c->bio_meta, 2, + sizeof(struct bbio) + sizeof(struct bio_vec) * + bucket_pages(c)) || + mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || + bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0)) || diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index dfeef583ee50..8ccbc8f3b3af 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -16,6 +16,22 @@ #include <linux/sort.h> #include <linux/sched/clock.h> +/* Default is -1; we skip past it for struct cached_dev's cache mode */ +static const char * const bch_cache_modes[] = { + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +/* Default is -1; we skip past it for stop_when_cache_set_failed */ +static const char * const bch_stop_on_failure_modes[] = { + "auto", + "always", + NULL +}; + static const char * const cache_replacement_policies[] = { "lru", "fifo", @@ -114,6 +130,20 @@ rw_attribute(btree_shrinker_disabled); rw_attribute(copy_gc_enabled); rw_attribute(size); +static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], + size_t selected) +{ + char *out = buf; + size_t i; + + for (i = 0; list[i]; i++) + out += snprintf(out, buf + size - out, + i == selected ? "[%s] " : "%s ", list[i]); + + out[-1] = '\n'; + return out - buf; +} + SHOW(__bch_cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, @@ -124,12 +154,12 @@ SHOW(__bch_cached_dev) if (attr == &sysfs_cache_mode) return bch_snprint_string_list(buf, PAGE_SIZE, - bch_cache_modes + 1, + bch_cache_modes, BDEV_CACHE_MODE(&dc->sb)); if (attr == &sysfs_stop_when_cache_set_failed) return bch_snprint_string_list(buf, PAGE_SIZE, - bch_stop_on_failure_modes + 1, + bch_stop_on_failure_modes, dc->stop_when_cache_set_failed); @@ -253,8 +283,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - v = bch_read_string_list(buf, bch_cache_modes + 1); - + v = __sysfs_match_string(bch_cache_modes, -1, buf); if (v < 0) return v; @@ -265,8 +294,7 @@ STORE(__cached_dev) } if (attr == &sysfs_stop_when_cache_set_failed) { - v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1); - + v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); if (v < 0) return v; @@ -635,6 +663,7 @@ SHOW_LOCKED(bch_cache_set) STORE(__bch_cache_set) { struct cache_set *c = container_of(kobj, struct cache_set, kobj); + ssize_t v; if (attr == &sysfs_unregister) bch_cache_set_unregister(c); @@ -698,8 +727,7 @@ STORE(__bch_cache_set) c->congested_write_threshold_us); if (attr == &sysfs_errors) { - ssize_t v = bch_read_string_list(buf, error_actions); - + v = __sysfs_match_string(error_actions, -1, buf); if (v < 0) return v; @@ -714,8 +742,7 @@ STORE(__bch_cache_set) c->error_decay = strtoul_or_return(buf) / 88; if (attr == &sysfs_io_disable) { - int v = strtoul_or_return(buf); - + v = strtoul_or_return(buf); if (v) { if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) @@ -929,6 +956,7 @@ SHOW_LOCKED(bch_cache) STORE(__bch_cache) { struct cache *ca = container_of(kobj, struct cache, kobj); + ssize_t v; if (attr == &sysfs_discard) { bool v = strtoul_or_return(buf); @@ -943,8 +971,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = bch_read_string_list(buf, cache_replacement_policies); - + v = __sysfs_match_string(cache_replacement_policies, -1, buf); if (v < 0) return v; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 74febd5230df..fc479b026d6d 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -120,41 +120,6 @@ ssize_t bch_hprint(char *buf, int64_t v) return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } -ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], - size_t selected) -{ - char *out = buf; - size_t i; - - for (i = 0; list[i]; i++) - out += snprintf(out, buf + size - out, - i == selected ? "[%s] " : "%s ", list[i]); - - out[-1] = '\n'; - return out - buf; -} - -ssize_t bch_read_string_list(const char *buf, const char * const list[]) -{ - size_t i; - char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL); - if (!d) - return -ENOMEM; - - s = strim(d); - - for (i = 0; list[i]; i++) - if (!strcmp(list[i], s)) - break; - - kfree(d); - - if (!list[i]) - return -EINVAL; - - return i; -} - bool bch_is_zero(const char *p, size_t n) { size_t i; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 268024529edd..cced87f8eb27 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -365,11 +365,6 @@ ssize_t bch_hprint(char *buf, int64_t v); bool bch_is_zero(const char *p, size_t n); int bch_parse_uuid(const char *s, char *uuid); -ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], - size_t selected); - -ssize_t bch_read_string_list(const char *buf, const char * const list[]); - struct time_stats { spinlock_t lock; /* diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c index 874841f0fc83..8e33a3808368 100644 --- a/drivers/md/dm-bio-prison-v1.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -19,7 +19,7 @@ struct dm_bio_prison { spinlock_t lock; - mempool_t *cell_pool; + mempool_t cell_pool; struct rb_root cells; }; @@ -34,14 +34,15 @@ static struct kmem_cache *_cell_cache; struct dm_bio_prison *dm_bio_prison_create(void) { struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + int ret; if (!prison) return NULL; spin_lock_init(&prison->lock); - prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); - if (!prison->cell_pool) { + ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache); + if (ret) { kfree(prison); return NULL; } @@ -54,21 +55,21 @@ EXPORT_SYMBOL_GPL(dm_bio_prison_create); void dm_bio_prison_destroy(struct dm_bio_prison *prison) { - mempool_destroy(prison->cell_pool); + mempool_exit(&prison->cell_pool); kfree(prison); } EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp) { - return mempool_alloc(prison->cell_pool, gfp); + return mempool_alloc(&prison->cell_pool, gfp); } EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell); void dm_bio_prison_free_cell(struct dm_bio_prison *prison, struct dm_bio_prison_cell *cell) { - mempool_free(cell, prison->cell_pool); + mempool_free(cell, &prison->cell_pool); } EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c index 8ce3a1a588cf..601b1569206a 100644 --- a/drivers/md/dm-bio-prison-v2.c +++ b/drivers/md/dm-bio-prison-v2.c @@ -21,7 +21,7 @@ struct dm_bio_prison_v2 { struct workqueue_struct *wq; spinlock_t lock; - mempool_t *cell_pool; + mempool_t cell_pool; struct rb_root cells; }; @@ -36,6 +36,7 @@ static struct kmem_cache *_cell_cache; struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) { struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + int ret; if (!prison) return NULL; @@ -43,8 +44,8 @@ struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) prison->wq = wq; spin_lock_init(&prison->lock); - prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); - if (!prison->cell_pool) { + ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache); + if (ret) { kfree(prison); return NULL; } @@ -57,21 +58,21 @@ EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2); void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison) { - mempool_destroy(prison->cell_pool); + mempool_exit(&prison->cell_pool); kfree(prison); } EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2); struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp) { - return mempool_alloc(prison->cell_pool, gfp); + return mempool_alloc(&prison->cell_pool, gfp); } EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2); void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, struct dm_bio_prison_cell_v2 *cell) { - mempool_free(cell, prison->cell_pool); + mempool_free(cell, &prison->cell_pool); } EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index da208638fba4..001c71248246 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -447,9 +447,9 @@ struct cache { struct work_struct migration_worker; struct delayed_work waker; struct dm_bio_prison_v2 *prison; - struct bio_set *bs; + struct bio_set bs; - mempool_t *migration_pool; + mempool_t migration_pool; struct dm_cache_policy *policy; unsigned policy_nr_args; @@ -550,7 +550,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) { struct dm_cache_migration *mg; - mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT); if (!mg) return NULL; @@ -569,7 +569,7 @@ static void free_migration(struct dm_cache_migration *mg) if (atomic_dec_and_test(&cache->nr_allocated_migrations)) wake_up(&cache->migration_wait); - mempool_free(mg, cache->migration_pool); + mempool_free(mg, &cache->migration_pool); } /*----------------------------------------------------------------*/ @@ -924,7 +924,7 @@ static void issue_op(struct bio *bio, void *context) static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { - struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs); + struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs); BUG_ON(!origin_bio); @@ -2011,7 +2011,7 @@ static void destroy(struct cache *cache) { unsigned i; - mempool_destroy(cache->migration_pool); + mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); @@ -2047,8 +2047,7 @@ static void destroy(struct cache *cache) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - if (cache->bs) - bioset_free(cache->bs); + bioset_exit(&cache->bs); kfree(cache); } @@ -2498,8 +2497,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->features = ca->features; if (writethrough_mode(cache)) { /* Create bioset for writethrough bios issued to origin */ - cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!cache->bs) + r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0); + if (r) goto bad; } @@ -2630,9 +2629,9 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, - migration_cache); - if (!cache->migration_pool) { + r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE, + migration_cache); + if (r) { *error = "Error creating cache's migration mempool"; goto bad; } diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 3222e21cbbf8..f21c5d21bf1b 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -91,8 +91,8 @@ struct mapped_device { /* * io objects are allocated from here. */ - struct bio_set *io_bs; - struct bio_set *bs; + struct bio_set io_bs; + struct bio_set bs; /* * freeze/thaw support require holding onto a super block diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 44ff473dab3e..da02f4d8e4b9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -143,14 +143,14 @@ struct crypt_config { * pool for per bio private data, crypto requests, * encryption requeusts/buffer pages and integrity tags */ - mempool_t *req_pool; - mempool_t *page_pool; - mempool_t *tag_pool; + mempool_t req_pool; + mempool_t page_pool; + mempool_t tag_pool; unsigned tag_pool_max_sectors; struct percpu_counter n_allocated_pages; - struct bio_set *bs; + struct bio_set bs; struct mutex bio_alloc_lock; struct workqueue_struct *io_queue; @@ -1245,7 +1245,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); if (!ctx->r.req) - ctx->r.req = mempool_alloc(cc->req_pool, GFP_NOIO); + ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); @@ -1262,7 +1262,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(cc->req_pool, GFP_NOIO); + ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); @@ -1290,7 +1290,7 @@ static void crypt_free_req_skcipher(struct crypt_config *cc, struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); if ((struct skcipher_request *)(io + 1) != req) - mempool_free(req, cc->req_pool); + mempool_free(req, &cc->req_pool); } static void crypt_free_req_aead(struct crypt_config *cc, @@ -1299,7 +1299,7 @@ static void crypt_free_req_aead(struct crypt_config *cc, struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); if ((struct aead_request *)(io + 1) != req) - mempool_free(req, cc->req_pool); + mempool_free(req, &cc->req_pool); } static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_bio) @@ -1409,7 +1409,7 @@ retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) mutex_lock(&cc->bio_alloc_lock); - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs); if (!clone) goto out; @@ -1418,7 +1418,7 @@ retry: remaining_size = size; for (i = 0; i < nr_iovecs; i++) { - page = mempool_alloc(cc->page_pool, gfp_mask); + page = mempool_alloc(&cc->page_pool, gfp_mask); if (!page) { crypt_free_buffer_pages(cc, clone); bio_put(clone); @@ -1453,7 +1453,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) bio_for_each_segment_all(bv, clone, i) { BUG_ON(!bv->bv_page); - mempool_free(bv->bv_page, cc->page_pool); + mempool_free(bv->bv_page, &cc->page_pool); } } @@ -1492,7 +1492,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) crypt_free_req(cc, io->ctx.r.req, base_bio); if (unlikely(io->integrity_metadata_from_pool)) - mempool_free(io->integrity_metadata, io->cc->tag_pool); + mempool_free(io->integrity_metadata, &io->cc->tag_pool); else kfree(io->integrity_metadata); @@ -1565,7 +1565,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) * biovecs we don't need to worry about the block layer * modifying the biovec array; so leverage bio_clone_fast(). */ - clone = bio_clone_fast(io->base_bio, gfp, cc->bs); + clone = bio_clone_fast(io->base_bio, gfp, &cc->bs); if (!clone) return 1; @@ -2219,15 +2219,13 @@ static void crypt_dtr(struct dm_target *ti) crypt_free_tfms(cc); - if (cc->bs) - bioset_free(cc->bs); + bioset_exit(&cc->bs); - mempool_destroy(cc->page_pool); - mempool_destroy(cc->req_pool); - mempool_destroy(cc->tag_pool); + mempool_exit(&cc->page_pool); + mempool_exit(&cc->req_pool); + mempool_exit(&cc->tag_pool); - if (cc->page_pool) - WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0); + WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0); percpu_counter_destroy(&cc->n_allocated_pages); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) @@ -2743,8 +2741,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) iv_size_padding = align_mask; } - ret = -ENOMEM; - /* ...| IV + padding | original IV | original sec. number | bio tag offset | */ additional_req_size = sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size + @@ -2752,8 +2748,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) sizeof(uint64_t) + sizeof(unsigned int); - cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + additional_req_size); - if (!cc->req_pool) { + ret = mempool_init_kmalloc_pool(&cc->req_pool, MIN_IOS, cc->dmreq_start + additional_req_size); + if (ret) { ti->error = "Cannot allocate crypt request mempool"; goto bad; } @@ -2762,14 +2758,14 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size, ARCH_KMALLOC_MINALIGN); - cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc); - if (!cc->page_pool) { + ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc); + if (ret) { ti->error = "Cannot allocate page mempool"; goto bad; } - cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS); - if (!cc->bs) { + ret = bioset_init(&cc->bs, MIN_IOS, 0, BIOSET_NEED_BVECS); + if (ret) { ti->error = "Cannot allocate crypt bioset"; goto bad; } @@ -2806,11 +2802,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!cc->tag_pool_max_sectors) cc->tag_pool_max_sectors = 1; - cc->tag_pool = mempool_create_kmalloc_pool(MIN_IOS, + ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS, cc->tag_pool_max_sectors * cc->on_disk_tag_size); - if (!cc->tag_pool) { + if (ret) { ti->error = "Cannot allocate integrity tags mempool"; - ret = -ENOMEM; goto bad; } @@ -2903,7 +2898,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) { if (bio_sectors(bio) > cc->tag_pool_max_sectors) dm_accept_partial_bio(bio, cc->tag_pool_max_sectors); - io->integrity_metadata = mempool_alloc(cc->tag_pool, GFP_NOIO); + io->integrity_metadata = mempool_alloc(&cc->tag_pool, GFP_NOIO); io->integrity_metadata_from_pool = true; } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 514fb4aec5d1..fc68c7aaef8e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -142,7 +142,7 @@ struct dm_integrity_c { unsigned tag_size; __s8 log2_tag_size; sector_t start; - mempool_t *journal_io_mempool; + mempool_t journal_io_mempool; struct dm_io_client *io; struct dm_bufio_client *bufio; struct workqueue_struct *metadata_wq; @@ -1817,7 +1817,7 @@ static void complete_copy_from_journal(unsigned long error, void *context) struct journal_completion *comp = io->comp; struct dm_integrity_c *ic = comp->ic; remove_range(ic, &io->range); - mempool_free(io, ic->journal_io_mempool); + mempool_free(io, &ic->journal_io_mempool); if (unlikely(error != 0)) dm_integrity_io_error(ic, "copying from journal", -EIO); complete_journal_op(comp); @@ -1886,7 +1886,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, } next_loop = k - 1; - io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO); + io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO); io->comp = ∁ io->range.logical_sector = sec; io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; @@ -1918,7 +1918,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, if (j == k) { remove_range_unlocked(ic, &io->range); spin_unlock_irq(&ic->endio_wait.lock); - mempool_free(io, ic->journal_io_mempool); + mempool_free(io, &ic->journal_io_mempool); goto skip_io; } for (l = j; l < k; l++) { @@ -2980,9 +2980,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache); - if (!ic->journal_io_mempool) { - r = -ENOMEM; + r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache); + if (r) { ti->error = "Cannot allocate mempool"; goto bad; } @@ -3196,7 +3195,7 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->writer_wq); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); - mempool_destroy(ic->journal_io_mempool); + mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); if (ic->dev) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index a8d914d5abbe..53c6ed0eaa1f 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -22,8 +22,8 @@ #define DM_IO_MAX_REGIONS BITS_PER_LONG struct dm_io_client { - mempool_t *pool; - struct bio_set *bios; + mempool_t pool; + struct bio_set bios; }; /* @@ -49,32 +49,33 @@ struct dm_io_client *dm_io_client_create(void) { struct dm_io_client *client; unsigned min_ios = dm_get_reserved_bio_based_ios(); + int ret; client = kmalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); - client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); - if (!client->pool) + ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache); + if (ret) goto bad; - client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS); - if (!client->bios) + ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS); + if (ret) goto bad; return client; bad: - mempool_destroy(client->pool); + mempool_exit(&client->pool); kfree(client); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } EXPORT_SYMBOL(dm_io_client_create); void dm_io_client_destroy(struct dm_io_client *client) { - mempool_destroy(client->pool); - bioset_free(client->bios); + mempool_exit(&client->pool); + bioset_exit(&client->bios); kfree(client); } EXPORT_SYMBOL(dm_io_client_destroy); @@ -120,7 +121,7 @@ static void complete_io(struct io *io) invalidate_kernel_vmap_range(io->vma_invalidate_address, io->vma_invalidate_size); - mempool_free(io, io->client->pool); + mempool_free(io, &io->client->pool); fn(error_bits, context); } @@ -344,7 +345,7 @@ static void do_region(int op, int op_flags, unsigned region, dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); } - bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); + bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio_set_dev(bio, where->bdev); bio->bi_end_io = endio; @@ -442,7 +443,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, init_completion(&sio.wait); - io = mempool_alloc(client->pool, GFP_NOIO); + io = mempool_alloc(&client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->client = client; @@ -474,7 +475,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, return -EIO; } - io = mempool_alloc(client->pool, GFP_NOIO); + io = mempool_alloc(&client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->client = client; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index e6e7c686646d..c89a675a2aac 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -47,7 +47,7 @@ struct dm_kcopyd_client { wait_queue_head_t destroyq; atomic_t nr_jobs; - mempool_t *job_pool; + mempool_t job_pool; struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; @@ -479,7 +479,7 @@ static int run_complete_job(struct kcopyd_job *job) */ if (job->master_job == job) { mutex_destroy(&job->lock); - mempool_free(job, kc->job_pool); + mempool_free(job, &kc->job_pool); } fn(read_err, write_err, context); @@ -751,7 +751,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, * Allocate an array of jobs consisting of one master job * followed by SPLIT_COUNT sub jobs. */ - job = mempool_alloc(kc->job_pool, GFP_NOIO); + job = mempool_alloc(&kc->job_pool, GFP_NOIO); mutex_init(&job->lock); /* @@ -835,7 +835,7 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, { struct kcopyd_job *job; - job = mempool_alloc(kc->job_pool, GFP_NOIO); + job = mempool_alloc(&kc->job_pool, GFP_NOIO); memset(job, 0, sizeof(struct kcopyd_job)); job->kc = kc; @@ -879,7 +879,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) *---------------------------------------------------------------*/ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { - int r = -ENOMEM; + int r; struct dm_kcopyd_client *kc; kc = kmalloc(sizeof(*kc), GFP_KERNEL); @@ -892,14 +892,16 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro INIT_LIST_HEAD(&kc->pages_jobs); kc->throttle = throttle; - kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); - if (!kc->job_pool) + r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache); + if (r) goto bad_slab; INIT_WORK(&kc->kcopyd_work, do_work); kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0); - if (!kc->kcopyd_wq) + if (!kc->kcopyd_wq) { + r = -ENOMEM; goto bad_workqueue; + } kc->pages = NULL; kc->nr_reserved_pages = kc->nr_free_pages = 0; @@ -923,7 +925,7 @@ bad_io_client: bad_client_pages: destroy_workqueue(kc->kcopyd_wq); bad_workqueue: - mempool_destroy(kc->job_pool); + mempool_exit(&kc->job_pool); bad_slab: kfree(kc); @@ -942,7 +944,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) destroy_workqueue(kc->kcopyd_wq); dm_io_client_destroy(kc->io_client); client_free_pages(kc); - mempool_destroy(kc->job_pool); + mempool_exit(&kc->job_pool); kfree(kc); } EXPORT_SYMBOL(dm_kcopyd_client_destroy); diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 53b7b06d0aa8..52090bee17c2 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -76,7 +76,7 @@ struct log_c { */ uint32_t integrated_flush; - mempool_t *flush_entry_pool; + mempool_t flush_entry_pool; }; static struct kmem_cache *_flush_entry_cache; @@ -249,11 +249,10 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, goto out; } - lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE, - _flush_entry_cache); - if (!lc->flush_entry_pool) { + r = mempool_init_slab_pool(&lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE, + _flush_entry_cache); + if (r) { DMERR("Failed to create flush_entry_pool"); - r = -ENOMEM; goto out; } @@ -313,7 +312,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, out: kfree(devices_rdata); if (r) { - mempool_destroy(lc->flush_entry_pool); + mempool_exit(&lc->flush_entry_pool); kfree(lc); kfree(ctr_str); } else { @@ -342,7 +341,7 @@ static void userspace_dtr(struct dm_dirty_log *log) if (lc->log_dev) dm_put_device(lc->ti, lc->log_dev); - mempool_destroy(lc->flush_entry_pool); + mempool_exit(&lc->flush_entry_pool); kfree(lc->usr_argv_str); kfree(lc); @@ -570,7 +569,7 @@ static int userspace_flush(struct dm_dirty_log *log) int mark_list_is_empty; int clear_list_is_empty; struct dm_dirty_log_flush_entry *fe, *tmp_fe; - mempool_t *flush_entry_pool = lc->flush_entry_pool; + mempool_t *flush_entry_pool = &lc->flush_entry_pool; spin_lock_irqsave(&lc->flush_lock, flags); list_splice_init(&lc->mark_list, &mark_list); @@ -653,7 +652,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region) struct dm_dirty_log_flush_entry *fe; /* Wait for an allocation, but _never_ fail */ - fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO); + fe = mempool_alloc(&lc->flush_entry_pool, GFP_NOIO); BUG_ON(!fe); spin_lock_irqsave(&lc->flush_lock, flags); @@ -687,7 +686,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region) * to cause the region to be resync'ed when the * device is activated next time. */ - fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC); + fe = mempool_alloc(&lc->flush_entry_pool, GFP_ATOMIC); if (!fe) { DMERR("Failed to allocate memory to clear region."); return; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 203a0419d2b0..d94ba6f72ff5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -520,7 +520,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, bdev = pgpath->path.dev->bdev; q = bdev_get_queue(bdev); - clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); + clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, + BLK_MQ_REQ_NOWAIT); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ if (blk_queue_dying(q)) { diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 85c32b22a420..43149eb49375 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -63,7 +63,7 @@ struct dm_region_hash { /* hash table */ rwlock_t hash_lock; - mempool_t *region_pool; + mempool_t region_pool; unsigned mask; unsigned nr_buckets; unsigned prime; @@ -169,6 +169,7 @@ struct dm_region_hash *dm_region_hash_create( struct dm_region_hash *rh; unsigned nr_buckets, max_buckets; size_t i; + int ret; /* * Calculate a suitable number of buckets for our hash @@ -220,9 +221,9 @@ struct dm_region_hash *dm_region_hash_create( INIT_LIST_HEAD(&rh->failed_recovered_regions); rh->flush_failure = 0; - rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, - sizeof(struct dm_region)); - if (!rh->region_pool) { + ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS, + sizeof(struct dm_region)); + if (ret) { vfree(rh->buckets); kfree(rh); rh = ERR_PTR(-ENOMEM); @@ -242,14 +243,14 @@ void dm_region_hash_destroy(struct dm_region_hash *rh) list_for_each_entry_safe(reg, nreg, rh->buckets + h, hash_list) { BUG_ON(atomic_read(®->pending)); - mempool_free(reg, rh->region_pool); + mempool_free(reg, &rh->region_pool); } } if (rh->log) dm_dirty_log_destroy(rh->log); - mempool_destroy(rh->region_pool); + mempool_exit(&rh->region_pool); vfree(rh->buckets); kfree(rh); } @@ -287,7 +288,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) { struct dm_region *reg, *nreg; - nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); + nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC); if (unlikely(!nreg)) nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL); @@ -303,7 +304,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region) reg = __rh_lookup(rh, region); if (reg) /* We lost the race. */ - mempool_free(nreg, rh->region_pool); + mempool_free(nreg, &rh->region_pool); else { __rh_insert(rh, nreg); if (nreg->state == DM_RH_CLEAN) { @@ -481,17 +482,17 @@ void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled) list_for_each_entry_safe(reg, next, &recovered, list) { rh->log->type->clear_region(rh->log, reg->key); complete_resync_work(reg, 1); - mempool_free(reg, rh->region_pool); + mempool_free(reg, &rh->region_pool); } list_for_each_entry_safe(reg, next, &failed_recovered, list) { complete_resync_work(reg, errors_handled ? 0 : 1); - mempool_free(reg, rh->region_pool); + mempool_free(reg, &rh->region_pool); } list_for_each_entry_safe(reg, next, &clean, list) { rh->log->type->clear_region(rh->log, reg->key); - mempool_free(reg, rh->region_pool); + mempool_free(reg, &rh->region_pool); } rh->log->type->flush(rh->log); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index bf0b840645cc..6e547b8dd298 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -406,7 +406,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ if (blk_queue_io_stat(clone->q)) clone->rq_flags |= RQF_IO_STAT; - clone->start_time = jiffies; + clone->start_time_ns = ktime_get_ns(); r = blk_insert_cloned_request(clone->q, clone); if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE) /* must complete clone in terms of original request */ @@ -433,7 +433,7 @@ static int setup_clone(struct request *clone, struct request *rq, { int r; - r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, + r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask, dm_rq_bio_constructor, tio); if (r) return r; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 216035be5661..b11ddc55f297 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -87,7 +87,7 @@ struct dm_snapshot { */ struct list_head out_of_order_list; - mempool_t *pending_pool; + mempool_t pending_pool; struct dm_exception_table pending; struct dm_exception_table complete; @@ -682,7 +682,7 @@ static void free_completed_exception(struct dm_exception *e) static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) { - struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, + struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool, GFP_NOIO); atomic_inc(&s->pending_exceptions_count); @@ -695,7 +695,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - mempool_free(pe, s->pending_pool); + mempool_free(pe, &s->pending_pool); smp_mb__before_atomic(); atomic_dec(&s->pending_exceptions_count); } @@ -1196,10 +1196,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_kcopyd; } - s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); - if (!s->pending_pool) { + r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache); + if (r) { ti->error = "Could not allocate mempool for pending exceptions"; - r = -ENOMEM; goto bad_pending_pool; } @@ -1259,7 +1258,7 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->pending_pool); + mempool_exit(&s->pending_pool); bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); @@ -1355,7 +1354,7 @@ static void snapshot_dtr(struct dm_target *ti) while (atomic_read(&s->pending_exceptions_count)) msleep(1); /* - * Ensure instructions in mempool_destroy aren't reordered + * Ensure instructions in mempool_exit aren't reordered * before atomic_read. */ smp_mb(); @@ -1367,7 +1366,7 @@ static void snapshot_dtr(struct dm_target *ti) __free_exceptions(s); - mempool_destroy(s->pending_pool); + mempool_exit(&s->pending_pool); dm_exception_store_destroy(s->store); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index b11107497d2e..6c923824ec91 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -260,7 +260,7 @@ struct pool { struct dm_deferred_set *all_io_ds; struct dm_thin_new_mapping *next_mapping; - mempool_t *mapping_pool; + mempool_t mapping_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -917,7 +917,7 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { cell_error(m->tc->pool, m->cell); list_del(&m->list); - mempool_free(m, m->tc->pool->mapping_pool); + mempool_free(m, &m->tc->pool->mapping_pool); } static void process_prepared_mapping(struct dm_thin_new_mapping *m) @@ -961,7 +961,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) out: list_del(&m->list); - mempool_free(m, pool->mapping_pool); + mempool_free(m, &pool->mapping_pool); } /*----------------------------------------------------------------*/ @@ -971,7 +971,7 @@ static void free_discard_mapping(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; if (m->cell) cell_defer_no_holder(tc, m->cell); - mempool_free(m, tc->pool->mapping_pool); + mempool_free(m, &tc->pool->mapping_pool); } static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) @@ -999,7 +999,7 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m) bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); - mempool_free(m, tc->pool->mapping_pool); + mempool_free(m, &tc->pool->mapping_pool); } /*----------------------------------------------------------------*/ @@ -1092,7 +1092,7 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) metadata_operation_failed(pool, "dm_thin_remove_range", r); bio_io_error(m->bio); cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); + mempool_free(m, &pool->mapping_pool); return; } @@ -1105,7 +1105,7 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) metadata_operation_failed(pool, "dm_pool_inc_data_range", r); bio_io_error(m->bio); cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); + mempool_free(m, &pool->mapping_pool); return; } @@ -1150,7 +1150,7 @@ static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m) bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); + mempool_free(m, &pool->mapping_pool); } static void process_prepared(struct pool *pool, struct list_head *head, @@ -1196,7 +1196,7 @@ static int ensure_next_mapping(struct pool *pool) if (pool->next_mapping) return 0; - pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC); + pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC); return pool->next_mapping ? 0 : -ENOMEM; } @@ -2835,8 +2835,8 @@ static void __pool_destroy(struct pool *pool) destroy_workqueue(pool->wq); if (pool->next_mapping) - mempool_free(pool->next_mapping, pool->mapping_pool); - mempool_destroy(pool->mapping_pool); + mempool_free(pool->next_mapping, &pool->mapping_pool); + mempool_exit(&pool->mapping_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); @@ -2931,11 +2931,11 @@ static struct pool *pool_create(struct mapped_device *pool_md, } pool->next_mapping = NULL; - pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE, - _new_mapping_cache); - if (!pool->mapping_pool) { + r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE, + _new_mapping_cache); + if (r) { *error = "Error creating pool's mapping mempool"; - err_p = ERR_PTR(-ENOMEM); + err_p = ERR_PTR(r); goto bad_mapping_pool; } @@ -2955,7 +2955,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; bad_sort_array: - mempool_destroy(pool->mapping_pool); + mempool_exit(&pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index e13f90832b6b..86405869f1af 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -309,13 +309,13 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) unsigned n; if (!fio->rs) - fio->rs = mempool_alloc(v->fec->rs_pool, GFP_NOIO); + fio->rs = mempool_alloc(&v->fec->rs_pool, GFP_NOIO); fec_for_each_prealloc_buffer(n) { if (fio->bufs[n]) continue; - fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOWAIT); + fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOWAIT); if (unlikely(!fio->bufs[n])) { DMERR("failed to allocate FEC buffer"); return -ENOMEM; @@ -327,7 +327,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) if (fio->bufs[n]) continue; - fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOWAIT); + fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT); /* we can manage with even one buffer if necessary */ if (unlikely(!fio->bufs[n])) break; @@ -335,7 +335,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) fio->nbufs = n; if (!fio->output) - fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO); + fio->output = mempool_alloc(&v->fec->output_pool, GFP_NOIO); return 0; } @@ -493,15 +493,15 @@ void verity_fec_finish_io(struct dm_verity_io *io) if (!verity_fec_is_enabled(io->v)) return; - mempool_free(fio->rs, f->rs_pool); + mempool_free(fio->rs, &f->rs_pool); fec_for_each_prealloc_buffer(n) - mempool_free(fio->bufs[n], f->prealloc_pool); + mempool_free(fio->bufs[n], &f->prealloc_pool); fec_for_each_extra_buffer(fio, n) - mempool_free(fio->bufs[n], f->extra_pool); + mempool_free(fio->bufs[n], &f->extra_pool); - mempool_free(fio->output, f->output_pool); + mempool_free(fio->output, &f->output_pool); } /* @@ -549,9 +549,9 @@ void verity_fec_dtr(struct dm_verity *v) if (!verity_fec_is_enabled(v)) goto out; - mempool_destroy(f->rs_pool); - mempool_destroy(f->prealloc_pool); - mempool_destroy(f->extra_pool); + mempool_exit(&f->rs_pool); + mempool_exit(&f->prealloc_pool); + mempool_exit(&f->extra_pool); kmem_cache_destroy(f->cache); if (f->data_bufio) @@ -675,6 +675,7 @@ int verity_fec_ctr(struct dm_verity *v) struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; u64 hash_blocks; + int ret; if (!verity_fec_is_enabled(v)) { verity_fec_dtr(v); @@ -770,11 +771,11 @@ int verity_fec_ctr(struct dm_verity *v) } /* Preallocate an rs_control structure for each worker thread */ - f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc, - fec_rs_free, (void *) v); - if (!f->rs_pool) { + ret = mempool_init(&f->rs_pool, num_online_cpus(), fec_rs_alloc, + fec_rs_free, (void *) v); + if (ret) { ti->error = "Cannot allocate RS pool"; - return -ENOMEM; + return ret; } f->cache = kmem_cache_create("dm_verity_fec_buffers", @@ -786,26 +787,26 @@ int verity_fec_ctr(struct dm_verity *v) } /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */ - f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() * - DM_VERITY_FEC_BUF_PREALLOC, - f->cache); - if (!f->prealloc_pool) { + ret = mempool_init_slab_pool(&f->prealloc_pool, num_online_cpus() * + DM_VERITY_FEC_BUF_PREALLOC, + f->cache); + if (ret) { ti->error = "Cannot allocate FEC buffer prealloc pool"; - return -ENOMEM; + return ret; } - f->extra_pool = mempool_create_slab_pool(0, f->cache); - if (!f->extra_pool) { + ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache); + if (ret) { ti->error = "Cannot allocate FEC buffer extra pool"; - return -ENOMEM; + return ret; } /* Preallocate an output buffer for each thread */ - f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(), - 1 << v->data_dev_block_bits); - if (!f->output_pool) { + ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(), + 1 << v->data_dev_block_bits); + if (ret) { ti->error = "Cannot allocate FEC output pool"; - return -ENOMEM; + return ret; } /* Reserve space for our per-bio data */ diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index bb31ce87a933..6ad803b2b36c 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -46,10 +46,10 @@ struct dm_verity_fec { sector_t hash_blocks; /* blocks covered after v->hash_start */ unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */ unsigned char rsn; /* N of RS(M, N) */ - mempool_t *rs_pool; /* mempool for fio->rs */ - mempool_t *prealloc_pool; /* mempool for preallocated buffers */ - mempool_t *extra_pool; /* mempool for extra buffers */ - mempool_t *output_pool; /* mempool for output */ + mempool_t rs_pool; /* mempool for fio->rs */ + mempool_t prealloc_pool; /* mempool for preallocated buffers */ + mempool_t extra_pool; /* mempool for extra buffers */ + mempool_t output_pool; /* mempool for output */ struct kmem_cache *cache; /* cache for buffers */ }; diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index e73b0776683c..30602d15ad9a 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -57,7 +57,7 @@ struct dmz_target { struct workqueue_struct *chunk_wq; /* For cloned BIOs to zones */ - struct bio_set *bio_set; + struct bio_set bio_set; /* For flush */ spinlock_t flush_lock; @@ -121,7 +121,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone, } /* Partial BIO: we need to clone the BIO */ - clone = bio_clone_fast(bio, GFP_NOIO, dmz->bio_set); + clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set); if (!clone) return -ENOMEM; @@ -779,10 +779,9 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift; /* Zone BIO */ - dmz->bio_set = bioset_create(DMZ_MIN_BIOS, 0, 0); - if (!dmz->bio_set) { + ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0); + if (ret) { ti->error = "Create BIO set failed"; - ret = -ENOMEM; goto err_meta; } @@ -828,7 +827,7 @@ err_cwq: destroy_workqueue(dmz->chunk_wq); err_bio: mutex_destroy(&dmz->chunk_lock); - bioset_free(dmz->bio_set); + bioset_exit(&dmz->bio_set); err_meta: dmz_dtr_metadata(dmz->metadata); err_dev: @@ -858,7 +857,7 @@ static void dmz_dtr(struct dm_target *ti) dmz_dtr_metadata(dmz->metadata); - bioset_free(dmz->bio_set); + bioset_exit(&dmz->bio_set); dmz_put_zoned_device(ti); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0a7b0107ca78..98dff36b89a3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -148,8 +148,8 @@ static int dm_numa_node = DM_NUMA_NODE; * For mempools pre-allocation at the table loading time. */ struct dm_md_mempools { - struct bio_set *bs; - struct bio_set *io_bs; + struct bio_set bs; + struct bio_set io_bs; }; struct table_device { @@ -537,7 +537,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_bioset(GFP_NOIO, 0, md->io_bs); + clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs); if (!clone) return NULL; @@ -572,7 +572,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t /* the dm_target_io embedded in ci->io is available */ tio = &ci->io->tio; } else { - struct bio *clone = bio_alloc_bioset(gfp_mask, 0, ci->io->md->bs); + struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs); if (!clone) return NULL; @@ -1583,7 +1583,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * won't be affected by this reassignment. */ struct bio *b = bio_clone_bioset(bio, GFP_NOIO, - md->queue->bio_split); + &md->queue->bio_split); ci.io->orig_bio = b; bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9); bio_chain(b, bio); @@ -1785,10 +1785,8 @@ static void cleanup_mapped_device(struct mapped_device *md) destroy_workqueue(md->wq); if (md->kworker_task) kthread_stop(md->kworker_task); - if (md->bs) - bioset_free(md->bs); - if (md->io_bs) - bioset_free(md->io_bs); + bioset_exit(&md->bs); + bioset_exit(&md->io_bs); if (md->dax_dev) { kill_dax(md->dax_dev); @@ -1965,16 +1963,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) * If so, reload bioset because front_pad may have changed * because a different table was loaded. */ - if (md->bs) { - bioset_free(md->bs); - md->bs = NULL; - } - if (md->io_bs) { - bioset_free(md->io_bs); - md->io_bs = NULL; - } + bioset_exit(&md->bs); + bioset_exit(&md->io_bs); - } else if (md->bs) { + } else if (bioset_initialized(&md->bs)) { /* * There's no need to reload with request-based dm * because the size of front_pad doesn't change. @@ -1986,12 +1978,14 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) goto out; } - BUG_ON(!p || md->bs || md->io_bs); + BUG_ON(!p || + bioset_initialized(&md->bs) || + bioset_initialized(&md->io_bs)); md->bs = p->bs; - p->bs = NULL; + memset(&p->bs, 0, sizeof(p->bs)); md->io_bs = p->io_bs; - p->io_bs = NULL; + memset(&p->io_bs, 0, sizeof(p->io_bs)); out: /* mempool bind completed, no longer need any mempools in the table */ dm_table_free_md_mempools(t); @@ -2905,6 +2899,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); unsigned int pool_size = 0; unsigned int front_pad, io_front_pad; + int ret; if (!pools) return NULL; @@ -2916,10 +2911,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); - pools->io_bs = bioset_create(pool_size, io_front_pad, 0); - if (!pools->io_bs) + ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0); + if (ret) goto out; - if (integrity && bioset_integrity_create(pools->io_bs, pool_size)) + if (integrity && bioset_integrity_create(&pools->io_bs, pool_size)) goto out; break; case DM_TYPE_REQUEST_BASED: @@ -2932,11 +2927,11 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu BUG(); } - pools->bs = bioset_create(pool_size, front_pad, 0); - if (!pools->bs) + ret = bioset_init(&pools->bs, pool_size, front_pad, 0); + if (ret) goto out; - if (integrity && bioset_integrity_create(pools->bs, pool_size)) + if (integrity && bioset_integrity_create(&pools->bs, pool_size)) goto out; return pools; @@ -2952,10 +2947,8 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) if (!pools) return; - if (pools->bs) - bioset_free(pools->bs); - if (pools->io_bs) - bioset_free(pools->io_bs); + bioset_exit(&pools->bs); + bioset_exit(&pools->io_bs); kfree(pools); } diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c index 38264b38420f..c2fdf899de14 100644 --- a/drivers/md/md-faulty.c +++ b/drivers/md/md-faulty.c @@ -214,7 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) } } if (failit) { - struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); + struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); bio_set_dev(b, conf->rdev->bdev); b->bi_private = bio; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 4964323d936b..d45c697c0ebe 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -269,7 +269,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) if (unlikely(bio_end_sector(bio) > end_sector)) { /* This bio crosses a device boundary, so we have to split it */ struct bio *split = bio_split(bio, end_sector - bio_sector, - GFP_NOIO, mddev->bio_set); + GFP_NOIO, &mddev->bio_set); bio_chain(split, bio); generic_make_request(bio); bio = split; diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 0a7e99d62c69..f71fcdb9b39c 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -80,7 +80,7 @@ static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status) bio->bi_status = status; bio_endio(bio); - mempool_free(mp_bh, conf->pool); + mempool_free(mp_bh, &conf->pool); } static void multipath_end_request(struct bio *bio) @@ -117,7 +117,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) return true; } - mp_bh = mempool_alloc(conf->pool, GFP_NOIO); + mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); mp_bh->master_bio = bio; mp_bh->mddev = mddev; @@ -125,7 +125,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { bio_io_error(bio); - mempool_free(mp_bh, conf->pool); + mempool_free(mp_bh, &conf->pool); return true; } multipath = conf->multipaths + mp_bh->path; @@ -378,6 +378,7 @@ static int multipath_run (struct mddev *mddev) struct multipath_info *disk; struct md_rdev *rdev; int working_disks; + int ret; if (md_check_no_bitmap(mddev)) return -EINVAL; @@ -431,9 +432,9 @@ static int multipath_run (struct mddev *mddev) } mddev->degraded = conf->raid_disks - working_disks; - conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, - sizeof(struct multipath_bh)); - if (conf->pool == NULL) + ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS, + sizeof(struct multipath_bh)); + if (ret) goto out_free_conf; mddev->thread = md_register_thread(multipathd, mddev, @@ -455,7 +456,7 @@ static int multipath_run (struct mddev *mddev) return 0; out_free_conf: - mempool_destroy(conf->pool); + mempool_exit(&conf->pool); kfree(conf->multipaths); kfree(conf); mddev->private = NULL; @@ -467,7 +468,7 @@ static void multipath_free(struct mddev *mddev, void *priv) { struct mpconf *conf = priv; - mempool_destroy(conf->pool); + mempool_exit(&conf->pool); kfree(conf->multipaths); kfree(conf); } diff --git a/drivers/md/md-multipath.h b/drivers/md/md-multipath.h index 0adb941f485a..b3099e5fc4d7 100644 --- a/drivers/md/md-multipath.h +++ b/drivers/md/md-multipath.h @@ -13,7 +13,7 @@ struct mpconf { spinlock_t device_lock; struct list_head retry_list; - mempool_t *pool; + mempool_t pool; }; /* diff --git a/drivers/md/md.c b/drivers/md/md.c index c208c01f63a5..fc692b7128bb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -193,10 +193,10 @@ struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, { struct bio *b; - if (!mddev || !mddev->bio_set) + if (!mddev || !bioset_initialized(&mddev->bio_set)) return bio_alloc(gfp_mask, nr_iovecs); - b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set); + b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set); if (!b) return NULL; return b; @@ -205,10 +205,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev); static struct bio *md_bio_alloc_sync(struct mddev *mddev) { - if (!mddev || !mddev->sync_set) + if (!mddev || !bioset_initialized(&mddev->sync_set)) return bio_alloc(GFP_NOIO, 1); - return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set); + return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set); } /* @@ -510,7 +510,10 @@ static void mddev_delayed_delete(struct work_struct *ws); static void mddev_put(struct mddev *mddev) { - struct bio_set *bs = NULL, *sync_bs = NULL; + struct bio_set bs, sync_bs; + + memset(&bs, 0, sizeof(bs)); + memset(&sync_bs, 0, sizeof(sync_bs)); if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; @@ -521,8 +524,8 @@ static void mddev_put(struct mddev *mddev) list_del_init(&mddev->all_mddevs); bs = mddev->bio_set; sync_bs = mddev->sync_set; - mddev->bio_set = NULL; - mddev->sync_set = NULL; + memset(&mddev->bio_set, 0, sizeof(mddev->bio_set)); + memset(&mddev->sync_set, 0, sizeof(mddev->sync_set)); if (mddev->gendisk) { /* We did a probe so need to clean up. Call * queue_work inside the spinlock so that @@ -535,10 +538,8 @@ static void mddev_put(struct mddev *mddev) kfree(mddev); } spin_unlock(&all_mddevs_lock); - if (bs) - bioset_free(bs); - if (sync_bs) - bioset_free(sync_bs); + bioset_exit(&bs); + bioset_exit(&sync_bs); } static void md_safemode_timeout(struct timer_list *t); @@ -2123,7 +2124,7 @@ int md_integrity_register(struct mddev *mddev) bdev_get_integrity(reference->bdev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); - if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) { + if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { pr_err("md: failed to create integrity pool for %s\n", mdname(mddev)); return -EINVAL; @@ -5497,17 +5498,15 @@ int md_run(struct mddev *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } - if (mddev->bio_set == NULL) { - mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!mddev->bio_set) - return -ENOMEM; + if (!bioset_initialized(&mddev->bio_set)) { + err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (err) + return err; } - if (mddev->sync_set == NULL) { - mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!mddev->sync_set) { - err = -ENOMEM; + if (!bioset_initialized(&mddev->sync_set)) { + err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (err) goto abort; - } } spin_lock(&pers_lock); @@ -5668,14 +5667,8 @@ int md_run(struct mddev *mddev) return 0; abort: - if (mddev->bio_set) { - bioset_free(mddev->bio_set); - mddev->bio_set = NULL; - } - if (mddev->sync_set) { - bioset_free(mddev->sync_set); - mddev->sync_set = NULL; - } + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); return err; } @@ -5888,14 +5881,8 @@ void md_stop(struct mddev *mddev) * This is called from dm-raid */ __md_stop(mddev); - if (mddev->bio_set) { - bioset_free(mddev->bio_set); - mddev->bio_set = NULL; - } - if (mddev->sync_set) { - bioset_free(mddev->sync_set); - mddev->sync_set = NULL; - } + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); } EXPORT_SYMBOL_GPL(md_stop); diff --git a/drivers/md/md.h b/drivers/md/md.h index fbc925cce810..3507cab22cb6 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -452,8 +452,8 @@ struct mddev { struct attribute_group *to_remove; - struct bio_set *bio_set; - struct bio_set *sync_set; /* for sync operations like + struct bio_set bio_set; + struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 584c10347267..65ae47a02218 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -479,7 +479,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) if (bio_end_sector(bio) > zone->zone_end) { struct bio *split = bio_split(bio, zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, - mddev->bio_set); + &mddev->bio_set); bio_chain(split, bio); generic_make_request(bio); bio = split; @@ -582,7 +582,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) sector = bio_sector; if (sectors < bio_sectors(bio)) { - struct bio *split = bio_split(bio, sectors, GFP_NOIO, mddev->bio_set); + struct bio *split = bio_split(bio, sectors, GFP_NOIO, + &mddev->bio_set); bio_chain(split, bio); generic_make_request(bio); bio = split; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e9e3308cb0a7..bad28520719b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -221,7 +221,7 @@ static void free_r1bio(struct r1bio *r1_bio) struct r1conf *conf = r1_bio->mddev->private; put_all_bios(conf, r1_bio); - mempool_free(r1_bio, conf->r1bio_pool); + mempool_free(r1_bio, &conf->r1bio_pool); } static void put_buf(struct r1bio *r1_bio) @@ -236,7 +236,7 @@ static void put_buf(struct r1bio *r1_bio) rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); } - mempool_free(r1_bio, conf->r1buf_pool); + mempool_free(r1_bio, &conf->r1buf_pool); lower_barrier(conf, sect); } @@ -1178,7 +1178,7 @@ alloc_r1bio(struct mddev *mddev, struct bio *bio) struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + r1_bio = mempool_alloc(&conf->r1bio_pool, GFP_NOIO); /* Ensure no bio records IO_BLOCKED */ memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0])); init_r1bio(r1_bio, mddev, bio); @@ -1268,7 +1268,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, - gfp, conf->bio_split); + gfp, &conf->bio_split); bio_chain(split, bio); generic_make_request(bio); bio = split; @@ -1278,7 +1278,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, r1_bio->read_disk = rdisk; - read_bio = bio_clone_fast(bio, gfp, mddev->bio_set); + read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); r1_bio->bios[rdisk] = read_bio; @@ -1439,7 +1439,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, - GFP_NOIO, conf->bio_split); + GFP_NOIO, &conf->bio_split); bio_chain(split, bio); generic_make_request(bio); bio = split; @@ -1479,9 +1479,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (r1_bio->behind_master_bio) mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, mddev->bio_set); + GFP_NOIO, &mddev->bio_set); else - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); + mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); if (r1_bio->behind_master_bio) { if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) @@ -1657,8 +1657,7 @@ static void close_sync(struct r1conf *conf) _allow_barrier(conf, idx); } - mempool_destroy(conf->r1buf_pool); - conf->r1buf_pool = NULL; + mempool_exit(&conf->r1buf_pool); } static int raid1_spare_active(struct mddev *mddev) @@ -2348,10 +2347,10 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { wbio = bio_clone_fast(r1_bio->behind_master_bio, GFP_NOIO, - mddev->bio_set); + &mddev->bio_set); } else { wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, - mddev->bio_set); + &mddev->bio_set); } bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); @@ -2564,17 +2563,15 @@ static int init_resync(struct r1conf *conf) int buffs; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; - BUG_ON(conf->r1buf_pool); - conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, - conf->poolinfo); - if (!conf->r1buf_pool) - return -ENOMEM; - return 0; + BUG_ON(mempool_initialized(&conf->r1buf_pool)); + + return mempool_init(&conf->r1buf_pool, buffs, r1buf_pool_alloc, + r1buf_pool_free, conf->poolinfo); } static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) { - struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); + struct r1bio *r1bio = mempool_alloc(&conf->r1buf_pool, GFP_NOIO); struct resync_pages *rps; struct bio *bio; int i; @@ -2617,7 +2614,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int idx = sector_to_idx(sector_nr); int page_idx = 0; - if (!conf->r1buf_pool) + if (!mempool_initialized(&conf->r1buf_pool)) if (init_resync(conf)) return 0; @@ -2953,14 +2950,13 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (!conf->poolinfo) goto abort; conf->poolinfo->raid_disks = mddev->raid_disks * 2; - conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, - r1bio_pool_free, - conf->poolinfo); - if (!conf->r1bio_pool) + err = mempool_init(&conf->r1bio_pool, NR_RAID1_BIOS, r1bio_pool_alloc, + r1bio_pool_free, conf->poolinfo); + if (err) goto abort; - conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!conf->bio_split) + err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); + if (err) goto abort; conf->poolinfo->mddev = mddev; @@ -3033,7 +3029,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) abort: if (conf) { - mempool_destroy(conf->r1bio_pool); + mempool_exit(&conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3041,8 +3037,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) kfree(conf->nr_waiting); kfree(conf->nr_queued); kfree(conf->barrier); - if (conf->bio_split) - bioset_free(conf->bio_split); + bioset_exit(&conf->bio_split); kfree(conf); } return ERR_PTR(err); @@ -3144,7 +3139,7 @@ static void raid1_free(struct mddev *mddev, void *priv) { struct r1conf *conf = priv; - mempool_destroy(conf->r1bio_pool); + mempool_exit(&conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3152,8 +3147,7 @@ static void raid1_free(struct mddev *mddev, void *priv) kfree(conf->nr_waiting); kfree(conf->nr_queued); kfree(conf->barrier); - if (conf->bio_split) - bioset_free(conf->bio_split); + bioset_exit(&conf->bio_split); kfree(conf); } @@ -3199,13 +3193,17 @@ static int raid1_reshape(struct mddev *mddev) * At the same time, we "pack" the devices so that all the missing * devices have the higher raid_disk numbers. */ - mempool_t *newpool, *oldpool; + mempool_t newpool, oldpool; struct pool_info *newpoolinfo; struct raid1_info *newmirrors; struct r1conf *conf = mddev->private; int cnt, raid_disks; unsigned long flags; int d, d2; + int ret; + + memset(&newpool, 0, sizeof(newpool)); + memset(&oldpool, 0, sizeof(oldpool)); /* Cannot change chunk_size, layout, or level */ if (mddev->chunk_sectors != mddev->new_chunk_sectors || @@ -3237,17 +3235,17 @@ static int raid1_reshape(struct mddev *mddev) newpoolinfo->mddev = mddev; newpoolinfo->raid_disks = raid_disks * 2; - newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, - r1bio_pool_free, newpoolinfo); - if (!newpool) { + ret = mempool_init(&newpool, NR_RAID1_BIOS, r1bio_pool_alloc, + r1bio_pool_free, newpoolinfo); + if (ret) { kfree(newpoolinfo); - return -ENOMEM; + return ret; } newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2, GFP_KERNEL); if (!newmirrors) { kfree(newpoolinfo); - mempool_destroy(newpool); + mempool_exit(&newpool); return -ENOMEM; } @@ -3287,7 +3285,7 @@ static int raid1_reshape(struct mddev *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - mempool_destroy(oldpool); + mempool_exit(&oldpool); return 0; } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index eb84bc68e2fd..e7ccad898736 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -118,10 +118,10 @@ struct r1conf { * mempools - it changes when the array grows or shrinks */ struct pool_info *poolinfo; - mempool_t *r1bio_pool; - mempool_t *r1buf_pool; + mempool_t r1bio_pool; + mempool_t r1buf_pool; - struct bio_set *bio_split; + struct bio_set bio_split; /* temporary buffer to synchronous IO when attempting to repair * a read error. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3c60774c8430..37d4b236b81b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -291,14 +291,14 @@ static void free_r10bio(struct r10bio *r10_bio) struct r10conf *conf = r10_bio->mddev->private; put_all_bios(conf, r10_bio); - mempool_free(r10_bio, conf->r10bio_pool); + mempool_free(r10_bio, &conf->r10bio_pool); } static void put_buf(struct r10bio *r10_bio) { struct r10conf *conf = r10_bio->mddev->private; - mempool_free(r10_bio, conf->r10buf_pool); + mempool_free(r10_bio, &conf->r10buf_pool); lower_barrier(conf); } @@ -1204,7 +1204,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, (unsigned long long)r10_bio->sector); if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, - gfp, conf->bio_split); + gfp, &conf->bio_split); bio_chain(split, bio); generic_make_request(bio); bio = split; @@ -1213,7 +1213,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } slot = r10_bio->read_slot; - read_bio = bio_clone_fast(bio, gfp, mddev->bio_set); + read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; @@ -1261,7 +1261,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, } else rdev = conf->mirrors[devnum].rdev; - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); + mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1509,7 +1509,7 @@ retry_write: if (r10_bio->sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, r10_bio->sectors, - GFP_NOIO, conf->bio_split); + GFP_NOIO, &conf->bio_split); bio_chain(split, bio); generic_make_request(bio); bio = split; @@ -1533,7 +1533,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) struct r10conf *conf = mddev->private; struct r10bio *r10_bio; - r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); r10_bio->master_bio = bio; r10_bio->sectors = sectors; @@ -1732,8 +1732,7 @@ static void close_sync(struct r10conf *conf) wait_barrier(conf); allow_barrier(conf); - mempool_destroy(conf->r10buf_pool); - conf->r10buf_pool = NULL; + mempool_exit(&conf->r10buf_pool); } static int raid10_spare_active(struct mddev *mddev) @@ -2583,7 +2582,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ - wbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); + wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + @@ -2816,25 +2815,25 @@ static void raid10d(struct md_thread *thread) static int init_resync(struct r10conf *conf) { - int buffs; - int i; + int ret, buffs, i; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; - BUG_ON(conf->r10buf_pool); + BUG_ON(mempool_initialized(&conf->r10buf_pool)); conf->have_replacement = 0; for (i = 0; i < conf->geo.raid_disks; i++) if (conf->mirrors[i].replacement) conf->have_replacement = 1; - conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); - if (!conf->r10buf_pool) - return -ENOMEM; + ret = mempool_init(&conf->r10buf_pool, buffs, + r10buf_pool_alloc, r10buf_pool_free, conf); + if (ret) + return ret; conf->next_resync = 0; return 0; } static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) { - struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); + struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO); struct rsync_pages *rp; struct bio *bio; int nalloc; @@ -2945,7 +2944,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t chunk_mask = conf->geo.chunk_mask; int page_idx = 0; - if (!conf->r10buf_pool) + if (!mempool_initialized(&conf->r10buf_pool)) if (init_resync(conf)) return 0; @@ -3699,13 +3698,13 @@ static struct r10conf *setup_conf(struct mddev *mddev) conf->geo = geo; conf->copies = copies; - conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, - r10bio_pool_free, conf); - if (!conf->r10bio_pool) + err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc, + r10bio_pool_free, conf); + if (err) goto out; - conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!conf->bio_split) + err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); + if (err) goto out; calc_sectors(conf, mddev->dev_sectors); @@ -3733,6 +3732,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) init_waitqueue_head(&conf->wait_barrier); atomic_set(&conf->nr_pending, 0); + err = -ENOMEM; conf->thread = md_register_thread(raid10d, mddev, "raid10"); if (!conf->thread) goto out; @@ -3742,11 +3742,10 @@ static struct r10conf *setup_conf(struct mddev *mddev) out: if (conf) { - mempool_destroy(conf->r10bio_pool); + mempool_exit(&conf->r10bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); - if (conf->bio_split) - bioset_free(conf->bio_split); + bioset_exit(&conf->bio_split); kfree(conf); } return ERR_PTR(err); @@ -3953,7 +3952,7 @@ static int raid10_run(struct mddev *mddev) out_free_conf: md_unregister_thread(&mddev->thread); - mempool_destroy(conf->r10bio_pool); + mempool_exit(&conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); @@ -3966,13 +3965,12 @@ static void raid10_free(struct mddev *mddev, void *priv) { struct r10conf *conf = priv; - mempool_destroy(conf->r10bio_pool); + mempool_exit(&conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf->mirrors_old); kfree(conf->mirrors_new); - if (conf->bio_split) - bioset_free(conf->bio_split); + bioset_exit(&conf->bio_split); kfree(conf); } @@ -4543,7 +4541,7 @@ read_more: * on all the target devices. */ // FIXME - mempool_free(r10_bio, conf->r10buf_pool); + mempool_free(r10_bio, &conf->r10buf_pool); set_bit(MD_RECOVERY_INTR, &mddev->recovery); return sectors_done; } diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index e2e8840de9bf..d3eaaf3eb1bc 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -93,10 +93,10 @@ struct r10conf { */ wait_queue_head_t wait_barrier; - mempool_t *r10bio_pool; - mempool_t *r10buf_pool; + mempool_t r10bio_pool; + mempool_t r10buf_pool; struct page *tmppage; - struct bio_set *bio_split; + struct bio_set bio_split; /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 3c65f52b68f5..2b775abf377b 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -125,9 +125,9 @@ struct r5l_log { struct list_head no_mem_stripes; /* pending stripes, -ENOMEM */ struct kmem_cache *io_kc; - mempool_t *io_pool; - struct bio_set *bs; - mempool_t *meta_pool; + mempool_t io_pool; + struct bio_set bs; + mempool_t meta_pool; struct md_thread *reclaim_thread; unsigned long reclaim_target; /* number of space that need to be @@ -579,7 +579,7 @@ static void r5l_log_endio(struct bio *bio) md_error(log->rdev->mddev, log->rdev); bio_put(bio); - mempool_free(io->meta_page, log->meta_pool); + mempool_free(io->meta_page, &log->meta_pool); spin_lock_irqsave(&log->io_list_lock, flags); __r5l_set_io_unit_state(io, IO_UNIT_IO_END); @@ -748,7 +748,7 @@ static void r5l_submit_current_io(struct r5l_log *log) static struct bio *r5l_bio_alloc(struct r5l_log *log) { - struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs); + struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio_set_dev(bio, log->rdev->bdev); @@ -780,7 +780,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log) struct r5l_io_unit *io; struct r5l_meta_block *block; - io = mempool_alloc(log->io_pool, GFP_ATOMIC); + io = mempool_alloc(&log->io_pool, GFP_ATOMIC); if (!io) return NULL; memset(io, 0, sizeof(*io)); @@ -791,7 +791,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log) bio_list_init(&io->flush_barriers); io->state = IO_UNIT_RUNNING; - io->meta_page = mempool_alloc(log->meta_pool, GFP_NOIO); + io->meta_page = mempool_alloc(&log->meta_pool, GFP_NOIO); block = page_address(io->meta_page); clear_page(block); block->magic = cpu_to_le32(R5LOG_MAGIC); @@ -1223,7 +1223,7 @@ static bool r5l_complete_finished_ios(struct r5l_log *log) log->next_checkpoint = io->log_start; list_del(&io->log_sibling); - mempool_free(io, log->io_pool); + mempool_free(io, &log->io_pool); r5l_run_no_mem_stripe(log); found = true; @@ -1647,7 +1647,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, { struct page *page; - ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, log->bs); + ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs); if (!ctx->ra_bio) return -ENOMEM; @@ -3066,6 +3066,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; char b[BDEVNAME_SIZE]; + int ret; pr_debug("md/raid:%s: using device %s as journal\n", mdname(conf->mddev), bdevname(rdev->bdev, b)); @@ -3111,16 +3112,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) if (!log->io_kc) goto io_kc; - log->io_pool = mempool_create_slab_pool(R5L_POOL_SIZE, log->io_kc); - if (!log->io_pool) + ret = mempool_init_slab_pool(&log->io_pool, R5L_POOL_SIZE, log->io_kc); + if (ret) goto io_pool; - log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!log->bs) + ret = bioset_init(&log->bs, R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (ret) goto io_bs; - log->meta_pool = mempool_create_page_pool(R5L_POOL_SIZE, 0); - if (!log->meta_pool) + ret = mempool_init_page_pool(&log->meta_pool, R5L_POOL_SIZE, 0); + if (ret) goto out_mempool; spin_lock_init(&log->tree_lock); @@ -3155,11 +3156,11 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) rcu_assign_pointer(conf->log, NULL); md_unregister_thread(&log->reclaim_thread); reclaim_thread: - mempool_destroy(log->meta_pool); + mempool_exit(&log->meta_pool); out_mempool: - bioset_free(log->bs); + bioset_exit(&log->bs); io_bs: - mempool_destroy(log->io_pool); + mempool_exit(&log->io_pool); io_pool: kmem_cache_destroy(log->io_kc); io_kc: @@ -3178,9 +3179,9 @@ void r5l_exit_log(struct r5conf *conf) wake_up(&conf->mddev->sb_wait); flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); - mempool_destroy(log->meta_pool); - bioset_free(log->bs); - mempool_destroy(log->io_pool); + mempool_exit(&log->meta_pool); + bioset_exit(&log->bs); + mempool_exit(&log->io_pool); kmem_cache_destroy(log->io_kc); kfree(log); } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 42890a08375b..3a7c36326589 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -105,9 +105,9 @@ struct ppl_conf { atomic64_t seq; /* current log write sequence number */ struct kmem_cache *io_kc; - mempool_t *io_pool; - struct bio_set *bs; - struct bio_set *flush_bs; + mempool_t io_pool; + struct bio_set bs; + struct bio_set flush_bs; /* used only for recovery */ int recovered_entries; @@ -244,7 +244,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, struct ppl_header *pplhdr; struct page *header_page; - io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT); + io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT); if (!io) return NULL; @@ -503,7 +503,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) struct bio *prev = bio; bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, - ppl_conf->bs); + &ppl_conf->bs); bio->bi_opf = prev->bi_opf; bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); @@ -570,7 +570,7 @@ static void ppl_io_unit_finished(struct ppl_io_unit *io) list_del(&io->log_sibling); spin_unlock(&log->io_list_lock); - mempool_free(io, ppl_conf->io_pool); + mempool_free(io, &ppl_conf->io_pool); spin_lock(&ppl_conf->no_mem_stripes_lock); if (!list_empty(&ppl_conf->no_mem_stripes)) { @@ -642,7 +642,7 @@ static void ppl_do_flush(struct ppl_io_unit *io) struct bio *bio; char b[BDEVNAME_SIZE]; - bio = bio_alloc_bioset(GFP_NOIO, 0, ppl_conf->flush_bs); + bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs); bio_set_dev(bio, bdev); bio->bi_private = io; bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; @@ -1246,11 +1246,9 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf) kfree(ppl_conf->child_logs); - if (ppl_conf->bs) - bioset_free(ppl_conf->bs); - if (ppl_conf->flush_bs) - bioset_free(ppl_conf->flush_bs); - mempool_destroy(ppl_conf->io_pool); + bioset_exit(&ppl_conf->bs); + bioset_exit(&ppl_conf->flush_bs); + mempool_exit(&ppl_conf->io_pool); kmem_cache_destroy(ppl_conf->io_kc); kfree(ppl_conf); @@ -1387,24 +1385,18 @@ int ppl_init_log(struct r5conf *conf) goto err; } - ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc, - ppl_io_pool_free, ppl_conf->io_kc); - if (!ppl_conf->io_pool) { - ret = -ENOMEM; + ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc, + ppl_io_pool_free, ppl_conf->io_kc); + if (ret) goto err; - } - ppl_conf->bs = bioset_create(conf->raid_disks, 0, BIOSET_NEED_BVECS); - if (!ppl_conf->bs) { - ret = -ENOMEM; + ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS); + if (ret) goto err; - } - ppl_conf->flush_bs = bioset_create(conf->raid_disks, 0, 0); - if (!ppl_conf->flush_bs) { - ret = -ENOMEM; + ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0); + if (ret) goto err; - } ppl_conf->count = conf->raid_disks; ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log), diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index be117d0a65a8..a2e64989b01f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5192,7 +5192,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) /* * use bio_clone_fast to make a copy of the bio */ - align_bi = bio_clone_fast(raid_bio, GFP_NOIO, mddev->bio_set); + align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); if (!align_bi) return 0; /* @@ -5277,7 +5277,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) if (sectors < bio_sectors(raid_bio)) { struct r5conf *conf = mddev->private; - split = bio_split(raid_bio, sectors, GFP_NOIO, conf->bio_split); + split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); bio_chain(split, raid_bio); generic_make_request(raid_bio); raid_bio = split; @@ -6773,8 +6773,7 @@ static void free_conf(struct r5conf *conf) if (conf->disks[i].extra_page) put_page(conf->disks[i].extra_page); kfree(conf->disks); - if (conf->bio_split) - bioset_free(conf->bio_split); + bioset_exit(&conf->bio_split); kfree(conf->stripe_hashtbl); kfree(conf->pending_data); kfree(conf); @@ -6853,6 +6852,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) int i; int group_cnt, worker_cnt_per_group; struct r5worker_group *new_group; + int ret; if (mddev->new_level != 5 && mddev->new_level != 4 @@ -6950,8 +6950,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; } - conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!conf->bio_split) + ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); + if (ret) goto abort; conf->mddev = mddev; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 3f8da26032ac..72e75ba6abf0 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -669,7 +669,7 @@ struct r5conf { int pool_size; /* number of disks in stripeheads in pool */ spinlock_t device_lock; struct disk_info *disks; - struct bio_set *bio_split; + struct bio_set bio_split; /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c index fca36a4910c2..d697e1ad929c 100644 --- a/drivers/media/pci/saa7164/saa7164-core.c +++ b/drivers/media/pci/saa7164/saa7164-core.c @@ -1122,23 +1122,11 @@ static int saa7164_proc_show(struct seq_file *m, void *v) return 0; } -static int saa7164_proc_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, saa7164_proc_show, NULL); -} - -static const struct file_operations saa7164_proc_fops = { - .open = saa7164_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int saa7164_proc_create(void) { struct proc_dir_entry *pe; - pe = proc_create("saa7164", S_IRUGO, NULL, &saa7164_proc_fops); + pe = proc_create_single("saa7164", S_IRUGO, NULL, saa7164_proc_show); if (!pe) return -ENOMEM; diff --git a/drivers/media/pci/zoran/videocodec.c b/drivers/media/pci/zoran/videocodec.c index 5ff23ef89215..4427ae7126e2 100644 --- a/drivers/media/pci/zoran/videocodec.c +++ b/drivers/media/pci/zoran/videocodec.c @@ -344,19 +344,6 @@ static int proc_videocodecs_show(struct seq_file *m, void *v) return 0; } - -static int proc_videocodecs_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_videocodecs_show, NULL); -} - -static const struct file_operations videocodecs_proc_fops = { - .owner = THIS_MODULE, - .open = proc_videocodecs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* ===================== */ @@ -373,7 +360,8 @@ videocodec_init (void) VIDEOCODEC_VERSION); #ifdef CONFIG_PROC_FS - videocodec_proc_entry = proc_create("videocodecs", 0, NULL, &videocodecs_proc_fops); + videocodec_proc_entry = proc_create_single("videocodecs", 0, NULL, + proc_videocodecs_show); if (!videocodec_proc_entry) { dprintk(1, KERN_ERR "videocodec: can't init procfs.\n"); } diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 57b13dfbd21e..a15181fa45f7 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2094,14 +2094,9 @@ static const struct block_device_operations msb_bdops = { static int msb_init_disk(struct memstick_dev *card) { struct msb_data *msb = memstick_get_drvdata(card); - struct memstick_host *host = card->host; int rc; - u64 limit = BLK_BOUNCE_HIGH; unsigned long capacity; - if (host->dev.dma_mask && *(host->dev.dma_mask)) - limit = *(host->dev.dma_mask); - mutex_lock(&msb_disk_lock); msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL); mutex_unlock(&msb_disk_lock); @@ -2123,7 +2118,6 @@ static int msb_init_disk(struct memstick_dev *card) msb->queue->queuedata = card; - blk_queue_bounce_limit(msb->queue, limit); blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS); blk_queue_max_segment_size(msb->queue, diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 8897962781bb..5ee932631fae 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1170,17 +1170,12 @@ static int mspro_block_init_card(struct memstick_dev *card) static int mspro_block_init_disk(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); - struct memstick_host *host = card->host; struct mspro_devinfo *dev_info = NULL; struct mspro_sys_info *sys_info = NULL; struct mspro_sys_attr *s_attr = NULL; int rc, disk_id; - u64 limit = BLK_BOUNCE_HIGH; unsigned long capacity; - if (host->dev.dma_mask && *(host->dev.dma_mask)) - limit = *(host->dev.dma_mask); - for (rc = 0; msb->attr_group.attrs[rc]; ++rc) { s_attr = mspro_from_sysfs_attr(msb->attr_group.attrs[rc]); @@ -1219,7 +1214,6 @@ static int mspro_block_init_disk(struct memstick_dev *card) msb->queue->queuedata = card; - blk_queue_bounce_limit(msb->queue, limit); blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES); blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS); blk_queue_max_segment_size(msb->queue, diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 51eb1b027963..a746ccdd630a 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -197,9 +197,9 @@ static int mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_valu static int mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init); #ifdef CONFIG_PROC_FS -static const struct file_operations mpt_summary_proc_fops; -static const struct file_operations mpt_version_proc_fops; -static const struct file_operations mpt_iocinfo_proc_fops; +static int mpt_summary_proc_show(struct seq_file *m, void *v); +static int mpt_version_proc_show(struct seq_file *m, void *v); +static int mpt_iocinfo_proc_show(struct seq_file *m, void *v); #endif static void mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc); @@ -2040,8 +2040,10 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) */ dent = proc_mkdir(ioc->name, mpt_proc_root_dir); if (dent) { - proc_create_data("info", S_IRUGO, dent, &mpt_iocinfo_proc_fops, ioc); - proc_create_data("summary", S_IRUGO, dent, &mpt_summary_proc_fops, ioc); + proc_create_single_data("info", S_IRUGO, dent, + mpt_iocinfo_proc_show, ioc); + proc_create_single_data("summary", S_IRUGO, dent, + mpt_summary_proc_show, ioc); } #endif @@ -6606,8 +6608,10 @@ procmpt_create(void) if (mpt_proc_root_dir == NULL) return -ENOTDIR; - proc_create("summary", S_IRUGO, mpt_proc_root_dir, &mpt_summary_proc_fops); - proc_create("version", S_IRUGO, mpt_proc_root_dir, &mpt_version_proc_fops); + proc_create_single("summary", S_IRUGO, mpt_proc_root_dir, + mpt_summary_proc_show); + proc_create_single("version", S_IRUGO, mpt_proc_root_dir, + mpt_version_proc_show); return 0; } @@ -6646,19 +6650,6 @@ static int mpt_summary_proc_show(struct seq_file *m, void *v) return 0; } -static int mpt_summary_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mpt_summary_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations mpt_summary_proc_fops = { - .owner = THIS_MODULE, - .open = mpt_summary_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int mpt_version_proc_show(struct seq_file *m, void *v) { u8 cb_idx; @@ -6701,19 +6692,6 @@ static int mpt_version_proc_show(struct seq_file *m, void *v) return 0; } -static int mpt_version_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mpt_version_proc_show, NULL); -} - -static const struct file_operations mpt_version_proc_fops = { - .owner = THIS_MODULE, - .open = mpt_version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int mpt_iocinfo_proc_show(struct seq_file *m, void *v) { MPT_ADAPTER *ioc = m->private; @@ -6793,19 +6771,6 @@ static int mpt_iocinfo_proc_show(struct seq_file *m, void *v) return 0; } - -static int mpt_iocinfo_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mpt_iocinfo_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations mpt_iocinfo_proc_fops = { - .owner = THIS_MODULE, - .open = mpt_iocinfo_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS } */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 86503f60468f..19a5aa70ecda 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1929,7 +1929,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc) MPT_SCSI_HOST *hd; MPT_ADAPTER *ioc; VirtDevice *vdevice; - enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; + enum blk_eh_timer_return rc = BLK_EH_DONE; hd = shost_priv(sc->device->host); if (hd == NULL) { diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index 1b52b8557034..2060d1483043 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -419,10 +419,25 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, /* Verify that EC can process command */ for (i = 0; i < len; i++) { rx_byte = rx_buf[i]; + /* + * Seeing the PAST_END, RX_BAD_DATA, or NOT_READY + * markers are all signs that the EC didn't fully + * receive our command. e.g., if the EC is flashing + * itself, it can't respond to any commands and instead + * clocks out EC_SPI_PAST_END from its SPI hardware + * buffer. Similar occurrences can happen if the AP is + * too slow to clock out data after asserting CS -- the + * EC will abort and fill its buffer with + * EC_SPI_RX_BAD_DATA. + * + * In all cases, these errors should be safe to retry. + * Report -EAGAIN and let the caller decide what to do + * about that. + */ if (rx_byte == EC_SPI_PAST_END || rx_byte == EC_SPI_RX_BAD_DATA || rx_byte == EC_SPI_NOT_READY) { - ret = -EREMOTEIO; + ret = -EAGAIN; break; } } @@ -431,7 +446,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) ret = cros_ec_spi_receive_packet(ec_dev, ec_msg->insize + sizeof(*response)); - else + else if (ret != -EAGAIN) dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); final_ret = terminate_request(ec_dev); @@ -537,10 +552,11 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, /* Verify that EC can process command */ for (i = 0; i < len; i++) { rx_byte = rx_buf[i]; + /* See comments in cros_ec_pkt_xfer_spi() */ if (rx_byte == EC_SPI_PAST_END || rx_byte == EC_SPI_RX_BAD_DATA || rx_byte == EC_SPI_NOT_READY) { - ret = -EREMOTEIO; + ret = -EAGAIN; break; } } @@ -549,7 +565,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) ret = cros_ec_spi_receive_response(ec_dev, ec_msg->insize + EC_MSG_TX_PROTO_BYTES); - else + else if (ret != -EAGAIN) dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); final_ret = terminate_request(ec_dev); diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 4f7635922394..42ea2eccaee9 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -270,16 +270,6 @@ static int options_open(struct inode *inode, struct file *file) return single_open(file, options_show, NULL); } -static int cch_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &cch_seq_ops); -} - -static int gru_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &gru_seq_ops); -} - /* *INDENT-OFF* */ static const struct file_operations statistics_fops = { .open = statistics_open, @@ -305,73 +295,30 @@ static const struct file_operations options_fops = { .release = single_release, }; -static const struct file_operations cch_fops = { - .open = cch_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -static const struct file_operations gru_fops = { - .open = gru_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static struct proc_entry { - char *name; - umode_t mode; - const struct file_operations *fops; - struct proc_dir_entry *entry; -} proc_files[] = { - {"statistics", 0644, &statistics_fops}, - {"mcs_statistics", 0644, &mcs_statistics_fops}, - {"debug_options", 0644, &options_fops}, - {"cch_status", 0444, &cch_fops}, - {"gru_status", 0444, &gru_fops}, - {NULL} -}; -/* *INDENT-ON* */ - static struct proc_dir_entry *proc_gru __read_mostly; -static int create_proc_file(struct proc_entry *p) -{ - p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); - if (!p->entry) - return -1; - return 0; -} - -static void delete_proc_files(void) -{ - struct proc_entry *p; - - if (proc_gru) { - for (p = proc_files; p->name; p++) - if (p->entry) - remove_proc_entry(p->name, proc_gru); - proc_remove(proc_gru); - } -} - int gru_proc_init(void) { - struct proc_entry *p; - proc_gru = proc_mkdir("sgi_uv/gru", NULL); - - for (p = proc_files; p->name; p++) - if (create_proc_file(p)) - goto err; + if (!proc_gru) + return -1; + if (!proc_create("statistics", 0644, proc_gru, &statistics_fops)) + goto err; + if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops)) + goto err; + if (!proc_create("debug_options", 0644, proc_gru, &options_fops)) + goto err; + if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops)) + goto err; + if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops)) + goto err; return 0; - err: - delete_proc_files(); + remove_proc_subtree("sgi_uv/gru", NULL); return -1; } void gru_proc_exit(void) { - delete_proc_files(); + remove_proc_subtree("sgi_uv/gru", NULL); } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9e923cd1d80e..d89e17829527 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -244,7 +244,7 @@ static ssize_t power_ro_lock_store(struct device *dev, mq = &md->queue; /* Dispatch locking to the block layer */ - req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM); + req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, 0); if (IS_ERR(req)) { count = PTR_ERR(req); goto out_put; @@ -650,8 +650,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, */ mq = &md->queue; req = blk_get_request(mq->queue, - idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - __GFP_RECLAIM); + idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(req)) { err = PTR_ERR(req); goto cmd_done; @@ -721,8 +720,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, */ mq = &md->queue; req = blk_get_request(mq->queue, - idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - __GFP_RECLAIM); + idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(req)) { err = PTR_ERR(req); goto cmd_err; @@ -2485,7 +2483,7 @@ static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd, break; } - return 0; + return ret; } #ifdef CONFIG_COMPAT @@ -2750,7 +2748,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) int ret; /* Ask the block layer about the card status */ - req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return PTR_ERR(req); req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS; @@ -2786,7 +2784,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) return -ENOMEM; /* Ask the block layer for the EXT CSD */ - req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) { err = PTR_ERR(req); goto out_free; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 56e9a803db21..648eb6743ed5 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -111,8 +111,9 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) __mmc_cqe_recovery_notifier(mq); return BLK_EH_RESET_TIMER; } - /* No timeout */ - return BLK_EH_HANDLED; + /* No timeout (XXX: huh? comment doesn't make much sense) */ + blk_mq_complete_request(req); + return BLK_EH_DONE; default: /* Timeout is handled by mmc core */ return BLK_EH_RESET_TIMER; diff --git a/drivers/mmc/core/sdio_uart.c b/drivers/mmc/core/sdio_uart.c index d3c91f412b69..25e113001a3c 100644 --- a/drivers/mmc/core/sdio_uart.c +++ b/drivers/mmc/core/sdio_uart.c @@ -1008,19 +1008,6 @@ static int sdio_uart_proc_show(struct seq_file *m, void *v) return 0; } -static int sdio_uart_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, sdio_uart_proc_show, NULL); -} - -static const struct file_operations sdio_uart_proc_fops = { - .owner = THIS_MODULE, - .open = sdio_uart_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_port_operations sdio_uart_port_ops = { .dtr_rts = uart_dtr_rts, .carrier_raised = uart_carrier_raised, @@ -1045,7 +1032,7 @@ static const struct tty_operations sdio_uart_ops = { .tiocmset = sdio_uart_tiocmset, .install = sdio_uart_install, .cleanup = sdio_uart_cleanup, - .proc_fops = &sdio_uart_proc_fops, + .proc_show = sdio_uart_proc_show, }; static struct tty_driver *sdio_uart_tty_driver; diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 0ef741bc515d..d0e83db42ae5 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -33,6 +33,8 @@ struct sdhci_iproc_host { const struct sdhci_iproc_data *data; u32 shadow_cmd; u32 shadow_blk; + bool is_cmd_shadowed; + bool is_blk_shadowed; }; #define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18) @@ -48,8 +50,22 @@ static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg) static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg) { - u32 val = sdhci_iproc_readl(host, (reg & ~3)); - u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host); + u32 val; + u16 word; + + if ((reg == SDHCI_TRANSFER_MODE) && iproc_host->is_cmd_shadowed) { + /* Get the saved transfer mode */ + val = iproc_host->shadow_cmd; + } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) && + iproc_host->is_blk_shadowed) { + /* Get the saved block info */ + val = iproc_host->shadow_blk; + } else { + val = sdhci_iproc_readl(host, (reg & ~3)); + } + word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff; return word; } @@ -105,13 +121,15 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg) if (reg == SDHCI_COMMAND) { /* Write the block now as we are issuing a command */ - if (iproc_host->shadow_blk != 0) { + if (iproc_host->is_blk_shadowed) { sdhci_iproc_writel(host, iproc_host->shadow_blk, SDHCI_BLOCK_SIZE); - iproc_host->shadow_blk = 0; + iproc_host->is_blk_shadowed = false; } oldval = iproc_host->shadow_cmd; - } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) { + iproc_host->is_cmd_shadowed = false; + } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) && + iproc_host->is_blk_shadowed) { /* Block size and count are stored in shadow reg */ oldval = iproc_host->shadow_blk; } else { @@ -123,9 +141,11 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg) if (reg == SDHCI_TRANSFER_MODE) { /* Save the transfer mode until the command is issued */ iproc_host->shadow_cmd = newval; + iproc_host->is_cmd_shadowed = true; } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) { /* Save the block info until the command is issued */ iproc_host->shadow_blk = newval; + iproc_host->is_blk_shadowed = true; } else { /* Command or other regular 32-bit write */ sdhci_iproc_writel(host, newval, reg & ~3); @@ -166,7 +186,7 @@ static const struct sdhci_ops sdhci_iproc_32only_ops = { static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = { .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, - .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, + .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON, .ops = &sdhci_iproc_32only_ops, }; @@ -206,7 +226,6 @@ static const struct sdhci_iproc_data iproc_data = { .caps1 = SDHCI_DRIVER_TYPE_C | SDHCI_DRIVER_TYPE_D | SDHCI_SUPPORT_DDR50, - .mmc_caps = MMC_CAP_1_8V_DDR, }; static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = { diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 16ae4ae8e8f9..29c0bfd74e8a 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -82,7 +82,6 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr, block = blk_rq_pos(req) << 9 >> tr->blkshift; nsect = blk_rq_cur_bytes(req) >> tr->blkshift; - buf = bio_data(req->bio); if (req_op(req) == REQ_OP_FLUSH) { if (tr->flush(dev)) @@ -100,9 +99,14 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr, return BLK_STS_IOERR; return BLK_STS_OK; case REQ_OP_READ: - for (; nsect > 0; nsect--, block++, buf += tr->blksize) - if (tr->readsect(dev, block, buf)) + buf = kmap(bio_page(req->bio)) + bio_offset(req->bio); + for (; nsect > 0; nsect--, block++, buf += tr->blksize) { + if (tr->readsect(dev, block, buf)) { + kunmap(bio_page(req->bio)); return BLK_STS_IOERR; + } + } + kunmap(bio_page(req->bio)); rq_flush_dcache_pages(req); return BLK_STS_OK; case REQ_OP_WRITE: @@ -110,9 +114,14 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr, return BLK_STS_IOERR; rq_flush_dcache_pages(req); - for (; nsect > 0; nsect--, block++, buf += tr->blksize) - if (tr->writesect(dev, block, buf)) + buf = kmap(bio_page(req->bio)) + bio_offset(req->bio); + for (; nsect > 0; nsect--, block++, buf += tr->blksize) { + if (tr->writesect(dev, block, buf)) { + kunmap(bio_page(req->bio)); return BLK_STS_IOERR; + } + } + kunmap(bio_page(req->bio)); return BLK_STS_OK; default: return BLK_STS_IOERR; @@ -418,7 +427,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) new->rq->queuedata = new; blk_queue_logical_block_size(new->rq, tr->blksize); - blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH); blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 807d17d863b3..64a1fcaafd9a 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1829,18 +1829,6 @@ static int mtd_proc_show(struct seq_file *m, void *v) mutex_unlock(&mtd_table_mutex); return 0; } - -static int mtd_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mtd_proc_show, NULL); -} - -static const struct file_operations mtd_proc_ops = { - .open = mtd_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ /*====================================================================*/ @@ -1883,7 +1871,7 @@ static int __init init_mtd(void) goto err_bdi; } - proc_mtd = proc_create("mtd", 0, NULL, &mtd_proc_ops); + proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show); ret = init_mtdchar(); if (ret) diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 01059f1a7bca..9f7d83e827c3 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -10,7 +10,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct bonding *bond = seq->private; + struct bonding *bond = PDE_DATA(file_inode(seq->file)); struct list_head *iter; struct slave *slave; loff_t off = 0; @@ -29,7 +29,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct bonding *bond = seq->private; + struct bonding *bond = PDE_DATA(file_inode(seq->file)); struct list_head *iter; struct slave *slave; bool found = false; @@ -56,7 +56,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v) static void bond_info_show_master(struct seq_file *seq) { - struct bonding *bond = seq->private; + struct bonding *bond = PDE_DATA(file_inode(seq->file)); const struct bond_opt_value *optval; struct slave *curr, *primary; int i; @@ -167,7 +167,7 @@ static void bond_info_show_master(struct seq_file *seq) static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { - struct bonding *bond = seq->private; + struct bonding *bond = PDE_DATA(file_inode(seq->file)); seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); @@ -257,38 +257,14 @@ static const struct seq_operations bond_info_seq_ops = { .show = bond_info_seq_show, }; -static int bond_info_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int res; - - res = seq_open(file, &bond_info_seq_ops); - if (!res) { - /* recover the pointer buried in proc_dir_entry data */ - seq = file->private_data; - seq->private = PDE_DATA(inode); - } - - return res; -} - -static const struct file_operations bond_info_fops = { - .owner = THIS_MODULE, - .open = bond_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - void bond_create_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir) { - bond->proc_entry = proc_create_data(bond_dev->name, - 0444, bn->proc_dir, - &bond_info_fops, bond); + bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444, + bn->proc_dir, &bond_info_seq_ops, bond); if (bond->proc_entry == NULL) netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n", DRV_NAME, bond_dev->name); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 78616787f2a3..3da5fca77cbd 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1712,6 +1712,18 @@ static const struct b53_chip_data b53_switch_chips[] = { .duplex_reg = B53_DUPLEX_STAT_FE, }, { + .chip_id = BCM5389_DEVICE_ID, + .dev_name = "BCM5389", + .vlans = 4096, + .enabled_ports = 0x1f, + .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, + { .chip_id = BCM5395_DEVICE_ID, .dev_name = "BCM5395", .vlans = 4096, @@ -2034,6 +2046,7 @@ int b53_switch_detect(struct b53_device *dev) else dev->chip_id = BCM5365_DEVICE_ID; break; + case BCM5389_DEVICE_ID: case BCM5395_DEVICE_ID: case BCM5397_DEVICE_ID: case BCM5398_DEVICE_ID: diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index fa7556f5d4fb..a533a90e3904 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -285,6 +285,7 @@ static const struct b53_io_ops b53_mdio_ops = { #define B53_BRCM_OUI_1 0x0143bc00 #define B53_BRCM_OUI_2 0x03625c00 #define B53_BRCM_OUI_3 0x00406000 +#define B53_BRCM_OUI_4 0x01410c00 static int b53_mdio_probe(struct mdio_device *mdiodev) { @@ -311,7 +312,8 @@ static int b53_mdio_probe(struct mdio_device *mdiodev) */ if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 && (phy_id & 0xfffffc00) != B53_BRCM_OUI_2 && - (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) { + (phy_id & 0xfffffc00) != B53_BRCM_OUI_3 && + (phy_id & 0xfffffc00) != B53_BRCM_OUI_4) { dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id); return -ENODEV; } @@ -360,6 +362,7 @@ static const struct of_device_id b53_of_match[] = { { .compatible = "brcm,bcm53125" }, { .compatible = "brcm,bcm53128" }, { .compatible = "brcm,bcm5365" }, + { .compatible = "brcm,bcm5389" }, { .compatible = "brcm,bcm5395" }, { .compatible = "brcm,bcm5397" }, { .compatible = "brcm,bcm5398" }, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 1187ebd79287..3b57f47d0e79 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -48,6 +48,7 @@ struct b53_io_ops { enum { BCM5325_DEVICE_ID = 0x25, BCM5365_DEVICE_ID = 0x65, + BCM5389_DEVICE_ID = 0x89, BCM5395_DEVICE_ID = 0x95, BCM5397_DEVICE_ID = 0x97, BCM5398_DEVICE_ID = 0x98, diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 23b45da784cb..b89acaee12d4 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, /* Locate the first rule available */ if (fs->location == RX_CLS_LOC_ANY) rule_index = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); + priv->num_cfp_rules); else rule_index = fs->location; + if (rule_index > bcm_sf2_cfp_rule_size(priv)) + return -ENOSPC; + layout = &udf_tcpip4_layout; /* We only use one UDF slice for now */ slice_num = bcm_sf2_get_slice_number(layout, 0); @@ -562,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, * first half because the HW search is by incrementing addresses. */ if (fs->location == RX_CLS_LOC_ANY) - rule_index[0] = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); + rule_index[1] = find_first_zero_bit(priv->cfp.used, + priv->num_cfp_rules); else - rule_index[0] = fs->location; + rule_index[1] = fs->location; + if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) + return -ENOSPC; /* Flag it as used (cleared on error path) such that we can immediately * obtain a second one to chain from. */ - set_bit(rule_index[0], priv->cfp.used); + set_bit(rule_index[1], priv->cfp.used); - rule_index[1] = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); - if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) { + rule_index[0] = find_first_zero_bit(priv->cfp.used, + priv->num_cfp_rules); + if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) { ret = -ENOSPC; goto out_err; } @@ -712,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Flag the second half rule as being used now, return it as the * location, and flag it as unique while dumping rules */ - set_bit(rule_index[1], priv->cfp.used); + set_bit(rule_index[0], priv->cfp.used); set_bit(rule_index[1], priv->cfp.unique); fs->location = rule_index[1]; return ret; out_err: - clear_bit(rule_index[0], priv->cfp.used); + clear_bit(rule_index[1], priv->cfp.used); return ret; } @@ -785,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port, int ret; u32 reg; - /* Refuse deletion of unused rules, and the default reserved rule */ - if (!test_bit(loc, priv->cfp.used) || loc == 0) - return -EINVAL; - /* Indicate which rule we want to read */ bcm_sf2_cfp_rule_addr_set(priv, loc); @@ -826,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 next_loc = 0; int ret; + /* Refuse deleting unused rules, and those that are not unique since + * that could leave IPv6 rules with one of the chained rule in the + * table. + */ + if (!test_bit(loc, priv->cfp.unique) || loc == 0) + return -EINVAL; + ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc); if (ret) return ret; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 36c8950dbd2d..176861bd2252 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1212,9 +1212,9 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, vp->mii.reg_num_mask = 0x1f; /* Makes sure rings are at least 16 byte aligned. */ - vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - &vp->rx_ring_dma); + &vp->rx_ring_dma, GFP_KERNEL); retval = -ENOMEM; if (!vp->rx_ring) goto free_device; @@ -1476,11 +1476,10 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, return 0; free_ring: - pci_free_consistent(pdev, - sizeof(struct boom_rx_desc) * RX_RING_SIZE - + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - vp->rx_ring, - vp->rx_ring_dma); + dma_free_coherent(&pdev->dev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, vp->rx_ring_dma); free_device: free_netdev(dev); pr_err(PFX "vortex_probe1 fails. Returns %d\n", retval); @@ -1751,9 +1750,9 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - dma = pci_map_single(VORTEX_PCI(vp), skb->data, - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + dma = dma_map_single(vp->gendev, skb->data, + PKT_BUF_SZ, DMA_FROM_DEVICE); + if (dma_mapping_error(vp->gendev, dma)) break; vp->rx_ring[i].addr = cpu_to_le32(dma); } @@ -2067,9 +2066,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vp->bus_master) { /* Set the bus-master controller to transfer the packet. */ int len = (skb->len + 3) & ~3; - vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, - PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) { dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; @@ -2168,9 +2167,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); if (!skb_shinfo(skb)->nr_frags) { - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, - PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); @@ -2178,9 +2177,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) } else { int i; - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, - skb_headlen(skb), PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); @@ -2189,21 +2188,21 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag, + dma_addr = skb_frag_dma_map(vp->gendev, frag, 0, frag->size, DMA_TO_DEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) { + if (dma_mapping_error(vp->gendev, dma_addr)) { for(i = i-1; i >= 0; i--) - dma_unmap_page(&VORTEX_PCI(vp)->dev, + dma_unmap_page(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr), le32_to_cpu(vp->tx_ring[entry].frag[i+1].length), DMA_TO_DEVICE); - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[0].addr), le32_to_cpu(vp->tx_ring[entry].frag[0].length), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); goto out_dma_err; } @@ -2218,8 +2217,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) } } #else - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].addr = cpu_to_le32(dma_addr); vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG); @@ -2254,7 +2253,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) out: return NETDEV_TX_OK; out_dma_err: - dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n"); + dev_err(vp->gendev, "Error mapping dma buffer\n"); goto out; } @@ -2322,7 +2321,7 @@ vortex_interrupt(int irq, void *dev_id) if (status & DMADone) { if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) { iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ - pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE); pkts_compl++; bytes_compl += vp->tx_skb->len; dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ @@ -2459,19 +2458,19 @@ boomerang_interrupt(int irq, void *dev_id) struct sk_buff *skb = vp->tx_skbuff[entry]; #if DO_ZEROCOPY int i; - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[0].addr), le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); for (i=1; i<=skb_shinfo(skb)->nr_frags; i++) - pci_unmap_page(VORTEX_PCI(vp), + dma_unmap_page(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[i].addr), le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); #else - pci_unmap_single(VORTEX_PCI(vp), - le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE); #endif pkts_compl++; bytes_compl += skb->len; @@ -2561,14 +2560,14 @@ static int vortex_rx(struct net_device *dev) /* 'skb_put()' points to the start of sk_buff data area. */ if (vp->bus_master && ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) { - dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len), - pkt_len, PCI_DMA_FROMDEVICE); + dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len), + pkt_len, DMA_FROM_DEVICE); iowrite32(dma, ioaddr + Wn7_MasterAddr); iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); iowrite16(StartDMAUp, ioaddr + EL3_CMD); while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000) ; - pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE); } else { ioread32_rep(ioaddr + RX_FIFO, skb_put(skb, pkt_len), @@ -2635,11 +2634,11 @@ boomerang_rx(struct net_device *dev) if (pkt_len < rx_copybreak && (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); /* 'skb_put()' points to the start of sk_buff data area. */ skb_put_data(skb, vp->rx_skbuff[entry]->data, pkt_len); - pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); vp->rx_copy++; } else { /* Pre-allocate the replacement skb. If it or its @@ -2651,9 +2650,9 @@ boomerang_rx(struct net_device *dev) dev->stats.rx_dropped++; goto clear_complete; } - newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + newdma = dma_map_single(vp->gendev, newskb->data, + PKT_BUF_SZ, DMA_FROM_DEVICE); + if (dma_mapping_error(vp->gendev, newdma)) { dev->stats.rx_dropped++; consume_skb(newskb); goto clear_complete; @@ -2664,7 +2663,7 @@ boomerang_rx(struct net_device *dev) vp->rx_skbuff[entry] = newskb; vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); - pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); vp->rx_nocopy++; } skb->protocol = eth_type_trans(skb, dev); @@ -2761,8 +2760,8 @@ vortex_close(struct net_device *dev) if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */ for (i = 0; i < RX_RING_SIZE; i++) if (vp->rx_skbuff[i]) { - pci_unmap_single( VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr), - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr), + PKT_BUF_SZ, DMA_FROM_DEVICE); dev_kfree_skb(vp->rx_skbuff[i]); vp->rx_skbuff[i] = NULL; } @@ -2775,12 +2774,12 @@ vortex_close(struct net_device *dev) int k; for (k=0; k<=skb_shinfo(skb)->nr_frags; k++) - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].frag[k].addr), le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); #else - pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE); #endif dev_kfree_skb(skb); vp->tx_skbuff[i] = NULL; @@ -3288,11 +3287,10 @@ static void vortex_remove_one(struct pci_dev *pdev) pci_iounmap(pdev, vp->ioaddr); - pci_free_consistent(pdev, - sizeof(struct boom_rx_desc) * RX_RING_SIZE - + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - vp->rx_ring, - vp->rx_ring_dma); + dma_free_coherent(&pdev->dev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, vp->rx_ring_dma); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index ac99d089ac72..1c97e39b478e 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -164,7 +164,9 @@ bad_clone_list[] __initdata = { #define NESM_START_PG 0x40 /* First page of TX buffer */ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -#if defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */ +#if defined(CONFIG_MACH_TX49XX) +# define DCR_VAL 0x48 /* 8-bit mode */ +#elif defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */ # define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49) #else # define DCR_VAL 0x49 diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index a561705f232c..be198cc0b10c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) if (!ioaddr) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("card has no PCI IO resources, aborting\n"); - return -ENODEV; + err = -ENODEV; + goto err_disable_dev; } err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK); if (err) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); - return err; + goto err_disable_dev; } if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("io address range already allocated\n"); - return -EBUSY; + err = -EBUSY; + goto err_disable_dev; } err = pcnet32_probe1(ioaddr, 1, pdev); + +err_disable_dev: if (err < 0) pci_disable_device(pdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index b57acb8dc35b..dc25066c59a1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -419,15 +419,15 @@ static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ - {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ - {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ - {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ - {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ - {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ - {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */ - {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */ - {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ - {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ + {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */ + {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */ + {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */ + {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */ + {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */ + {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */ + {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */ + {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */ + {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */ }; static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { @@ -444,16 +444,6 @@ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ - {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ - {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ - {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ - {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ - {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ - {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */ - {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */ - {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */ - {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ - {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ }; static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index db92f1858060..b76447baccaf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -836,7 +836,7 @@ bool is_filter_exact_match(struct adapter *adap, { struct tp_params *tp = &adap->params.tp; u64 hash_filter_mask = tp->hash_filter_mask; - u32 mask; + u64 ntuple_mask = 0; if (!is_hashfilter(adap)) return false; @@ -865,73 +865,45 @@ bool is_filter_exact_match(struct adapter *adap, if (!fs->val.fport || fs->mask.fport != 0xffff) return false; - if (tp->fcoe_shift >= 0) { - mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W; - if (mask && !fs->mask.fcoe) - return false; - } + /* calculate tuple mask and compare with mask configured in hw */ + if (tp->fcoe_shift >= 0) + ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift; - if (tp->port_shift >= 0) { - mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W; - if (mask && !fs->mask.iport) - return false; - } + if (tp->port_shift >= 0) + ntuple_mask |= (u64)fs->mask.iport << tp->port_shift; if (tp->vnic_shift >= 0) { - mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W; - - if ((adap->params.tp.ingress_config & VNIC_F)) { - if (mask && !fs->mask.pfvf_vld) - return false; - } else { - if (mask && !fs->mask.ovlan_vld) - return false; - } + if ((adap->params.tp.ingress_config & VNIC_F)) + ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift; + else + ntuple_mask |= (u64)fs->mask.ovlan_vld << + tp->vnic_shift; } - if (tp->vlan_shift >= 0) { - mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W; - if (mask && !fs->mask.ivlan) - return false; - } + if (tp->vlan_shift >= 0) + ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift; - if (tp->tos_shift >= 0) { - mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W; - if (mask && !fs->mask.tos) - return false; - } + if (tp->tos_shift >= 0) + ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift; - if (tp->protocol_shift >= 0) { - mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W; - if (mask && !fs->mask.proto) - return false; - } + if (tp->protocol_shift >= 0) + ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift; - if (tp->ethertype_shift >= 0) { - mask = (hash_filter_mask >> tp->ethertype_shift) & - FT_ETHERTYPE_W; - if (mask && !fs->mask.ethtype) - return false; - } + if (tp->ethertype_shift >= 0) + ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift; - if (tp->macmatch_shift >= 0) { - mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W; - if (mask && !fs->mask.macidx) - return false; - } + if (tp->macmatch_shift >= 0) + ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift; + + if (tp->matchtype_shift >= 0) + ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift; + + if (tp->frag_shift >= 0) + ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift; + + if (ntuple_mask != hash_filter_mask) + return false; - if (tp->matchtype_shift >= 0) { - mask = (hash_filter_mask >> tp->matchtype_shift) & - FT_MPSHITTYPE_W; - if (mask && !fs->mask.matchtype) - return false; - } - if (tp->frag_shift >= 0) { - mask = (hash_filter_mask >> tp->frag_shift) & - FT_FRAGMENTATION_W; - if (mask && !fs->mask.frag) - return false; - } return true; } diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 81684acf52af..8a8b12b720ef 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2747,11 +2747,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); /* Query PCI controller on system for DMA addressing - * limitation for the device. Try 64-bit first, and + * limitation for the device. Try 47-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47)); if (err) { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { @@ -2765,10 +2765,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_release_regions; } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47)); if (err) { dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 64); + "for consistent allocations, aborting\n", 47); goto err_out_release_regions; } using_dac = 1; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c697e79e491e..8f755009ff38 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3309,7 +3309,9 @@ void be_detect_error(struct be_adapter *adapter) if ((val & POST_STAGE_FAT_LOG_START) != POST_STAGE_FAT_LOG_START && (val & POST_STAGE_ARMFW_UE) - != POST_STAGE_ARMFW_UE) + != POST_STAGE_ARMFW_UE && + (val & POST_STAGE_RECOVERABLE_ERR) + != POST_STAGE_RECOVERABLE_ERR) return; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d4604bc8eb5b..9d3eed46830d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx. * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index f81439796ac7..43d973215040 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Fast Ethernet Controller (ENET) PTP driver for MX6x. * * Copyright (C) 2012 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 6e8d6a6f6aaf..5ec1185808e5 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -192,6 +192,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, if (adapter->fw_done_rc) { dev_err(dev, "Couldn't map long term buffer,rc = %d\n", adapter->fw_done_rc); + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); return -1; } return 0; @@ -795,9 +796,11 @@ static int ibmvnic_login(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long timeout = msecs_to_jiffies(30000); int retry_count = 0; + bool retry; int rc; do { + retry = false; if (retry_count > IBMVNIC_MAX_QUEUES) { netdev_warn(netdev, "Login attempts exceeded\n"); return -1; @@ -821,6 +824,9 @@ static int ibmvnic_login(struct net_device *netdev) retry_count++; release_sub_crqs(adapter, 1); + retry = true; + netdev_dbg(netdev, + "Received partial success, retrying...\n"); adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); send_cap_queries(adapter); @@ -848,7 +854,7 @@ static int ibmvnic_login(struct net_device *netdev) netdev_warn(netdev, "Adapter login failed\n"); return -1; } - } while (adapter->init_done_rc == PARTIALSUCCESS); + } while (retry); /* handle pending MAC address changes after successful login */ if (adapter->mac_change_pending) { @@ -1821,9 +1827,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rc) return rc; } + ibmvnic_disable_irqs(adapter); } - - ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) @@ -2617,18 +2622,21 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, { struct device *dev = &adapter->vdev->dev; unsigned long rc; - u64 val; if (scrq->hw_irq > 0x100000000ULL) { dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); return 1; } - val = (0xff000000) | scrq->hw_irq; - rc = plpar_hcall_norets(H_EOI, val); - if (rc) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + if (adapter->resetting && + adapter->reset_reason == VNIC_RESET_MOBILITY) { + u64 val = (0xff000000) | scrq->hw_irq; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", + val, rc); + } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); @@ -4586,14 +4594,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) release_crq_queue(adapter); } - rc = init_stats_buffers(adapter); - if (rc) - return rc; - - rc = init_stats_token(adapter); - if (rc) - return rc; - return rc; } @@ -4662,13 +4662,21 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_init_fail; } while (rc == EAGAIN); + rc = init_stats_buffers(adapter); + if (rc) + goto ibmvnic_init_fail; + + rc = init_stats_token(adapter); + if (rc) + goto ibmvnic_stats_fail; + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev->min_mtu = adapter->min_mtu - ETH_HLEN; netdev->max_mtu = adapter->max_mtu - ETH_HLEN; rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) - goto ibmvnic_init_fail; + goto ibmvnic_dev_file_err; netif_carrier_off(netdev); rc = register_netdev(netdev); @@ -4687,6 +4695,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ibmvnic_register_fail: device_remove_file(&dev->dev, &dev_attr_failover); +ibmvnic_dev_file_err: + release_stats_token(adapter); + +ibmvnic_stats_fail: + release_stats_buffers(adapter); + ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index afadba99f7b8..2ecd55856c50 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9054,7 +9054,6 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, { const struct tc_action *a; LIST_HEAD(actions); - int err; if (!tcf_exts_has_actions(exts)) return -EINVAL; @@ -9075,11 +9074,11 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, if (!dev) return -EINVAL; - err = handle_redirect_action(adapter, dev->ifindex, queue, - action); - if (err == 0) - return err; + return handle_redirect_action(adapter, dev->ifindex, + queue, action); } + + return -EINVAL; } return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index a822f7a56bc5..685337d58276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -43,12 +43,12 @@ #include "fw.h" /* - * We allocate in as big chunks as we can, up to a maximum of 256 KB - * per chunk. + * We allocate in page size (default 4KB on many archs) chunks to avoid high + * order memory allocations in fragmented/high usage memory situation. */ enum { - MLX4_ICM_ALLOC_SIZE = 1 << 18, - MLX4_TABLE_CHUNK_SIZE = 1 << 18 + MLX4_ICM_ALLOC_SIZE = PAGE_SIZE, + MLX4_TABLE_CHUNK_SIZE = PAGE_SIZE, }; static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) @@ -398,9 +398,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, u64 size; obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; + if (WARN_ON(!obj_per_chunk)) + return -EINVAL; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); + table->icm = kvzalloc(num_icm * sizeof(*table->icm), GFP_KERNEL); if (!table->icm) return -ENOMEM; table->virt = virt; @@ -446,7 +448,7 @@ err: mlx4_free_icm(dev, table->icm[i], use_coherent); } - kfree(table->icm); + kvfree(table->icm); return -ENOMEM; } @@ -462,5 +464,5 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) mlx4_free_icm(dev, table->icm[i], table->coherent); } - kfree(table->icm); + kvfree(table->icm); } diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 2edcce98ab2d..65482f004e50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -172,7 +172,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) list_add_tail(&dev_ctx->list, &priv->ctx_list); spin_unlock_irqrestore(&priv->ctx_lock, flags); - mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n", + mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n", dev_ctx->intf->protocol, enable ? "enabled" : "disabled"); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 211578ffc70d..60172a38c4a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2929,6 +2929,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create file for port %d\n", port); devlink_port_unregister(&info->devlink_port); info->port = -1; + return err; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); @@ -2950,9 +2951,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) &info->port_attr); devlink_port_unregister(&info->devlink_port); info->port = -1; + return err; } - return err; + return 0; } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 3aaf4bad6c5a..427e7a31862c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -393,11 +393,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; struct mlx4_qp *qp; - spin_lock(&qp_table->lock); + spin_lock_irq(&qp_table->lock); qp = __mlx4_qp_lookup(dev, qpn); - spin_unlock(&qp_table->lock); + spin_unlock_irq(&qp_table->lock); return qp; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 176645762e49..1ff0b0e93804 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -615,6 +615,45 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } +static __be32 mlx5e_get_fcs(struct sk_buff *skb) +{ + int last_frag_sz, bytes_in_prev, nr_frags; + u8 *fcs_p1, *fcs_p2; + skb_frag_t *last_frag; + __be32 fcs_bytes; + + if (!skb_is_nonlinear(skb)) + return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); + + nr_frags = skb_shinfo(skb)->nr_frags; + last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; + last_frag_sz = skb_frag_size(last_frag); + + /* If all FCS data is in last frag */ + if (last_frag_sz >= ETH_FCS_LEN) + return *(__be32 *)(skb_frag_address(last_frag) + + last_frag_sz - ETH_FCS_LEN); + + fcs_p2 = (u8 *)skb_frag_address(last_frag); + bytes_in_prev = ETH_FCS_LEN - last_frag_sz; + + /* Find where the other part of the FCS is - Linear or another frag */ + if (nr_frags == 1) { + fcs_p1 = skb_tail_pointer(skb); + } else { + skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; + + fcs_p1 = skb_frag_address(prev_frag) + + skb_frag_size(prev_frag); + } + fcs_p1 -= bytes_in_prev; + + memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); + memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); + + return fcs_bytes; +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -643,6 +682,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); + if (unlikely(netdev->features & NETIF_F_RXFCS)) + skb->csum = csum_add(skb->csum, + (__force __wsum)mlx5e_get_fcs(skb)); rq->stats.csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 0f5da499a223..fad8c2e3804e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -237,19 +237,17 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, context->buf.sg[0].data = &context->command; spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - list_add_tail(&context->list, &fdev->ipsec->pending_cmds); + res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); + if (!res) + list_add_tail(&context->list, &fdev->ipsec->pending_cmds); spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); if (res) { - mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n", - res); - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - list_del(&context->list); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); kfree(context); return ERR_PTR(res); } + /* Context will be freed by wait func after completion */ return context; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ca38a30fbe91..adc6ab2cf429 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4433,6 +4433,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port"); return -EINVAL; } + if (is_vlan_dev(upper_dev) && + vlan_dev_vlan_id(upper_dev) == 1) { + NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic"); + return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 7ed08486ae23..c805dcbebd02 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -84,7 +84,7 @@ static int sonic_open(struct net_device *dev) for (i = 0; i < SONIC_NUM_RRS; i++) { dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE), SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!laddr) { + if (dma_mapping_error(lp->device, laddr)) { while(i > 0) { /* free any that were mapped successfully */ i--; dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 1dc424685f4e..35fb31f682af 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -335,7 +335,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem); start = mem; - while (mem - start + 8 < nfp_cpp_area_size(area)) { + while (mem - start + 8 <= nfp_cpp_area_size(area)) { u8 __iomem *value; u32 type, length; diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 00f41c145d4d..820b226d6ff8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -77,7 +77,7 @@ #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET /* ILT entry structure */ -#define ILT_ENTRY_PHY_ADDR_MASK 0x000FFFFFFFFFFFULL +#define ILT_ENTRY_PHY_ADDR_MASK (~0ULL >> 12) #define ILT_ENTRY_PHY_ADDR_SHIFT 0 #define ILT_ENTRY_VALID_MASK 0x1ULL #define ILT_ENTRY_VALID_SHIFT 52 diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 38502815d681..468c59d2e491 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -292,6 +292,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) struct qed_ll2_tx_packet *p_pkt = NULL; struct qed_ll2_info *p_ll2_conn; struct qed_ll2_tx_queue *p_tx; + unsigned long flags = 0; dma_addr_t tx_frag; p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); @@ -300,6 +301,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) p_tx = &p_ll2_conn->tx_queue; + spin_lock_irqsave(&p_tx->lock, flags); while (!list_empty(&p_tx->active_descq)) { p_pkt = list_first_entry(&p_tx->active_descq, struct qed_ll2_tx_packet, list_entry); @@ -309,6 +311,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) list_del(&p_pkt->list_entry); b_last_packet = list_empty(&p_tx->active_descq); list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + spin_unlock_irqrestore(&p_tx->lock, flags); if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { struct qed_ooo_buffer *p_buffer; @@ -328,7 +331,9 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) b_last_frag, b_last_packet); } + spin_lock_irqsave(&p_tx->lock, flags); } + spin_unlock_irqrestore(&p_tx->lock, flags); } static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) @@ -556,6 +561,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) struct qed_ll2_info *p_ll2_conn = NULL; struct qed_ll2_rx_packet *p_pkt = NULL; struct qed_ll2_rx_queue *p_rx; + unsigned long flags = 0; p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); if (!p_ll2_conn) @@ -563,13 +569,14 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) p_rx = &p_ll2_conn->rx_queue; + spin_lock_irqsave(&p_rx->lock, flags); while (!list_empty(&p_rx->active_descq)) { p_pkt = list_first_entry(&p_rx->active_descq, struct qed_ll2_rx_packet, list_entry); if (!p_pkt) break; - list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); + spin_unlock_irqrestore(&p_rx->lock, flags); if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { struct qed_ooo_buffer *p_buffer; @@ -588,7 +595,30 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) cookie, rx_buf_addr, b_last); } + spin_lock_irqsave(&p_rx->lock, flags); } + spin_unlock_irqrestore(&p_rx->lock, flags); +} + +static bool +qed_ll2_lb_rxq_handler_slowpath(struct qed_hwfn *p_hwfn, + struct core_rx_slow_path_cqe *p_cqe) +{ + struct ooo_opaque *iscsi_ooo; + u32 cid; + + if (p_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) + return false; + + iscsi_ooo = (struct ooo_opaque *)&p_cqe->opaque_data; + if (iscsi_ooo->ooo_opcode != TCP_EVENT_DELETE_ISLES) + return false; + + /* Need to make a flush */ + cid = le32_to_cpu(iscsi_ooo->cid); + qed_ooo_release_connection_isles(p_hwfn, p_hwfn->p_ooo_info, cid); + + return true; } static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, @@ -617,6 +647,11 @@ static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn, cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); cqe_type = cqe->rx_cqe_sp.type; + if (cqe_type == CORE_RX_CQE_TYPE_SLOW_PATH) + if (qed_ll2_lb_rxq_handler_slowpath(p_hwfn, + &cqe->rx_cqe_sp)) + continue; + if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) { DP_NOTICE(p_hwfn, "Got a non-regular LB LL2 completion [type 0x%02x]\n", @@ -794,6 +829,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) + return 0; + rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn); if (rc) return rc; @@ -814,6 +852,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) + return 0; + new_idx = le16_to_cpu(*p_tx->p_fw_cons); num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); @@ -1867,17 +1908,25 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle) /* Stop Tx & Rx of connection, if needed */ if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */ rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); if (rc) goto out; + qed_ll2_txq_flush(p_hwfn, connection_handle); + qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); } if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */ rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); if (rc) goto out; + qed_ll2_rxq_flush(p_hwfn, connection_handle); + qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); } if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) @@ -1925,16 +1974,6 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle) if (!p_ll2_conn) return; - if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { - p_ll2_conn->rx_queue.b_cb_registred = false; - qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); - } - - if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { - p_ll2_conn->tx_queue.b_cb_registred = false; - qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); - } - kfree(p_ll2_conn->tx_queue.descq_mem); qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index a01e7d6e5442..f6655e251bbd 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1066,13 +1066,12 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); + qede_rdma_dev_remove(edev); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); qede_ptp_disable(edev); - qede_rdma_dev_remove(edev); - edev->ops->common->set_power_state(cdev, PCI_D0); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index a5b792ce2ae7..1bf930d4a1e5 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -163,7 +163,7 @@ enum { }; /* Driver's parameters */ -#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE) +#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_RENESAS) #define SH_ETH_RX_ALIGN 32 #else #define SH_ETH_RX_ALIGN 2 diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a4ebd8715494..661828e8fdcf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1289,9 +1289,8 @@ static int efx_init_io(struct efx_nic *efx) pci_set_master(pci_dev); - /* Set the PCI DMA mask. Try all possibilities from our - * genuine mask down to 32 bits, because some architectures - * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + /* Set the PCI DMA mask. Try all possibilities from our genuine mask + * down to 32 bits, because some architectures will allow 40 bit * masks event though they reject 46 bit masks. */ while (dma_mask > 0x7fffffffUL) { diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 3d6c91e96589..dd5530a4f8c8 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -1242,9 +1242,8 @@ static int ef4_init_io(struct ef4_nic *efx) pci_set_master(pci_dev); - /* Set the PCI DMA mask. Try all possibilities from our - * genuine mask down to 32 bits, because some architectures - * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + /* Set the PCI DMA mask. Try all possibilities from our genuine mask + * down to 32 bits, because some architectures will allow 40 bit * masks event though they reject 46 bit masks. */ while (dma_mask > 0x7fffffffUL) { diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index f4c0b02ddad8..59fbf74dcada 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1674,8 +1674,8 @@ static int netsec_probe(struct platform_device *pdev) if (ret) goto unreg_napi; - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) - dev_warn(&pdev->dev, "Failed to enable 64-bit DMA\n"); + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40))) + dev_warn(&pdev->dev, "Failed to set DMA mask\n"); ret = register_netdev(ndev); if (ret) { diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index abceea802ea1..38828ab77eb9 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1873,7 +1873,7 @@ static int davinci_emac_probe(struct platform_device *pdev) if (IS_ERR(priv->txchan)) { dev_err(&pdev->dev, "error initializing tx dma channel\n"); rc = PTR_ERR(priv->txchan); - goto no_cpdma_chan; + goto err_free_dma; } priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH, @@ -1881,14 +1881,14 @@ static int davinci_emac_probe(struct platform_device *pdev) if (IS_ERR(priv->rxchan)) { dev_err(&pdev->dev, "error initializing rx dma channel\n"); rc = PTR_ERR(priv->rxchan); - goto no_cpdma_chan; + goto err_free_txchan; } res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) { dev_err(&pdev->dev, "error getting irq res\n"); rc = -ENOENT; - goto no_cpdma_chan; + goto err_free_rxchan; } ndev->irq = res->start; @@ -1914,7 +1914,7 @@ static int davinci_emac_probe(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); dev_err(&pdev->dev, "%s: failed to get_sync(%d)\n", __func__, rc); - goto no_cpdma_chan; + goto err_napi_del; } /* register the network device */ @@ -1924,7 +1924,7 @@ static int davinci_emac_probe(struct platform_device *pdev) dev_err(&pdev->dev, "error in register_netdev\n"); rc = -ENODEV; pm_runtime_put(&pdev->dev); - goto no_cpdma_chan; + goto err_napi_del; } @@ -1937,11 +1937,13 @@ static int davinci_emac_probe(struct platform_device *pdev) return 0; -no_cpdma_chan: - if (priv->txchan) - cpdma_chan_destroy(priv->txchan); - if (priv->rxchan) - cpdma_chan_destroy(priv->rxchan); +err_napi_del: + netif_napi_del(&priv->napi); +err_free_rxchan: + cpdma_chan_destroy(priv->rxchan); +err_free_txchan: + cpdma_chan_destroy(priv->txchan); +err_free_dma: cpdma_ctlr_destroy(priv->dma); no_pdata: if (of_phy_is_fixed_link(np)) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index dfabbae72efd..f347fd9c5b28 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -449,20 +449,6 @@ static const struct seq_operations bpq_seqops = { .show = bpq_seq_show, }; -static int bpq_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &bpq_seqops); -} - -static const struct file_operations bpq_info_fops = { - .owner = THIS_MODULE, - .open = bpq_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - /* ------------------------------------------------------------------------ */ static const struct net_device_ops bpq_netdev_ops = { @@ -590,7 +576,7 @@ static int bpq_device_event(struct notifier_block *this, static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS - if (!proc_create("bpqether", 0444, init_net.proc_net, &bpq_info_fops)) { + if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 3de272959090..6c03932d8a6b 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -2084,21 +2084,6 @@ static const struct seq_operations scc_net_seq_ops = { .stop = scc_net_seq_stop, .show = scc_net_seq_show, }; - - -static int scc_net_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &scc_net_seq_ops); -} - -static const struct file_operations scc_net_seq_fops = { - .owner = THIS_MODULE, - .open = scc_net_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - #endif /* CONFIG_PROC_FS */ @@ -2122,7 +2107,7 @@ static int __init scc_init_driver (void) } rtnl_unlock(); - proc_create("z8530drv", 0, init_net.proc_net, &scc_net_seq_fops); + proc_create_seq("z8530drv", 0, init_net.proc_net, &scc_net_seq_ops); return 0; } diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 83034eb7ed4f..16ec7af6ab7b 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -841,20 +841,6 @@ static const struct seq_operations yam_seqops = { .stop = yam_seq_stop, .show = yam_seq_show, }; - -static int yam_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &yam_seqops); -} - -static const struct file_operations yam_info_fops = { - .owner = THIS_MODULE, - .open = yam_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif @@ -1168,7 +1154,7 @@ static int __init yam_init_driver(void) yam_timer.expires = jiffies + HZ / 100; add_timer(&yam_timer); - proc_create("yam", 0444, init_net.proc_net, &yam_info_fops); + proc_create_seq("yam", 0444, init_net.proc_net, &yam_seqops); return 0; error: while (--i >= 0) { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 450eec264a5e..4377c26f714d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -792,8 +792,10 @@ static int ipvlan_device_event(struct notifier_block *unused, break; case NETDEV_CHANGEADDR: - list_for_each_entry(ipvlan, &port->ipvlans, pnode) + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); + } break; case NETDEV_PRE_TYPE_CHANGE: diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index 6838129839ca..e757b09f1889 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct phy_device *phydev) return rc; /* make rcal=100, since rdb default is 000 */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10); if (rc < 0) return rc; /* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10); if (rc < 0) return rc; /* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00); return 0; } diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5ad130c3da43..d5e0833d69b9 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -56,7 +56,7 @@ int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum) /* The register must be written to both the Shadow Register Select and * the Shadow Read Register Selector */ - phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | + phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MASK | regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); return phy_read(phydev, MII_BCM54XX_AUX_CTL); } diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index 7c73808cbbde..81cceaa412fe 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -14,11 +14,18 @@ #ifndef _LINUX_BCM_PHY_LIB_H #define _LINUX_BCM_PHY_LIB_H +#include <linux/brcmphy.h> #include <linux/phy.h> int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val); int bcm_phy_read_exp(struct phy_device *phydev, u16 reg); +static inline int bcm_phy_write_exp_sel(struct phy_device *phydev, + u16 reg, u16 val) +{ + return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val); +} + int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val); int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum); diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 29b1c88b55cc..01d2ff2f6241 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -65,10 +65,10 @@ struct bcm7xxx_phy_priv { static void r_rc_cal_reset(struct phy_device *phydev) { /* Reset R_CAL/RC_CAL Engine */ - bcm_phy_write_exp(phydev, 0x00b0, 0x0010); + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010); /* Disable Reset R_AL/RC_CAL Engine */ - bcm_phy_write_exp(phydev, 0x00b0, 0x0000); + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000); } static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index f41b224a9cdb..ab195f0916d6 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -573,9 +573,40 @@ static int ksz9031_config_init(struct phy_device *phydev) ksz9031_of_load_skew_values(phydev, of_node, MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4, tx_data_skews, 4); + + /* Silicon Errata Sheet (DS80000691D or DS80000692D): + * When the device links in the 1000BASE-T slave mode only, + * the optional 125MHz reference output clock (CLK125_NDO) + * has wide duty cycle variation. + * + * The optional CLK125_NDO clock does not meet the RGMII + * 45/55 percent (min/max) duty cycle requirement and therefore + * cannot be used directly by the MAC side for clocking + * applications that have setup/hold time requirements on + * rising and falling clock edges. + * + * Workaround: + * Force the phy to be the master to receive a stable clock + * which meets the duty cycle requirement. + */ + if (of_property_read_bool(of_node, "micrel,force-master")) { + result = phy_read(phydev, MII_CTRL1000); + if (result < 0) + goto err_force_master; + + /* enable master mode, config & prefer master */ + result |= CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER; + result = phy_write(phydev, MII_CTRL1000, result); + if (result < 0) + goto err_force_master; + } } return ksz9031_center_flp_timing(phydev); + +err_force_master: + phydev_err(phydev, "failed to force the phy to master mode\n"); + return result; } #define KSZ8873MLL_GLOBAL_CONTROL_4 0x06 diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index dc7c7ec43202..02ad03a2fab7 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -605,30 +605,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (cmd == PPPIOCDETACH) { /* - * We have to be careful here... if the file descriptor - * has been dup'd, we could have another process in the - * middle of a poll using the same file *, so we had - * better not free the interface data structures - - * instead we fail the ioctl. Even in this case, we - * shut down the interface if we are the owner of it. - * Actually, we should get rid of PPPIOCDETACH, userland - * (i.e. pppd) could achieve the same effect by closing - * this fd and reopening /dev/ppp. + * PPPIOCDETACH is no longer supported as it was heavily broken, + * and is only known to have been used by pppd older than + * ppp-2.4.2 (released November 2003). */ + pr_warn_once("%s (%d) used obsolete PPPIOCDETACH ioctl\n", + current->comm, current->pid); err = -EINVAL; - if (pf->kind == INTERFACE) { - ppp = PF_TO_PPP(pf); - rtnl_lock(); - if (file == ppp->owner) - unregister_netdevice(ppp->dev); - rtnl_unlock(); - } - if (atomic_long_read(&file->f_count) < 2) { - ppp_release(NULL, file); - err = 0; - } else - pr_warn("PPPIOCDETACH file->f_count=%ld\n", - atomic_long_read(&file->f_count)); goto out; } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 7df07337d69c..ce61231e96ea 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1096,21 +1096,6 @@ static const struct seq_operations pppoe_seq_ops = { .stop = pppoe_seq_stop, .show = pppoe_seq_show, }; - -static int pppoe_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &pppoe_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations pppoe_seq_fops = { - .owner = THIS_MODULE, - .open = pppoe_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif /* CONFIG_PROC_FS */ static const struct proto_ops pppoe_ops = { @@ -1146,7 +1131,8 @@ static __net_init int pppoe_init_net(struct net *net) rwlock_init(&pn->hash_lock); - pde = proc_create("pppoe", 0444, net->proc_net, &pppoe_seq_fops); + pde = proc_create_net("pppoe", 0444, net->proc_net, + &pppoe_seq_ops, sizeof(struct seq_net_private)); #ifdef CONFIG_PROC_FS if (!pde) return -ENOMEM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ef33950a45d9..23e9eb66197f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -681,15 +681,6 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } -static void tun_cleanup_tx_ring(struct tun_file *tfile) -{ - if (tfile->tx_ring.queue) { - ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - xdp_rxq_info_unreg(&tfile->xdp_rxq); - memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); - } -} - static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -736,7 +727,9 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - tun_cleanup_tx_ring(tfile); + if (tun) + xdp_rxq_info_unreg(&tfile->xdp_rxq); + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); sock_put(&tfile->sk); } } @@ -783,14 +776,14 @@ static void tun_detach_all(struct net_device *dev) tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); - tun_cleanup_tx_ring(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); - tun_cleanup_tx_ring(tfile); } BUG_ON(tun->numdisabled != 0); @@ -834,7 +827,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, } if (!tfile->detached && - ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { + ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len, + GFP_KERNEL, tun_ptr_free)) { err = -ENOMEM; goto out; } @@ -1429,6 +1423,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1451,10 +1452,14 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait) if (!ptr_ring_empty(&tfile->tx_ring)) mask |= EPOLLIN | EPOLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= EPOLLOUT | EPOLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) @@ -1645,7 +1650,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, else *skb_xdp = 0; - preempt_disable(); + local_bh_disable(); rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog && !*skb_xdp) { @@ -1670,7 +1675,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, if (err) goto err_redirect; rcu_read_unlock(); - preempt_enable(); + local_bh_enable(); return NULL; case XDP_TX: get_page(alloc_frag->page); @@ -1679,7 +1684,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, goto err_redirect; tun_xdp_flush(tun->dev); rcu_read_unlock(); - preempt_enable(); + local_bh_enable(); return NULL; case XDP_PASS: delta = orig_data - xdp.data; @@ -1698,7 +1703,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, skb = build_skb(buf, buflen); if (!skb) { rcu_read_unlock(); - preempt_enable(); + local_bh_enable(); return ERR_PTR(-ENOMEM); } @@ -1708,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, alloc_frag->offset += buflen; rcu_read_unlock(); - preempt_enable(); + local_bh_enable(); return skb; @@ -1716,7 +1721,7 @@ err_redirect: put_page(alloc_frag->page); err_xdp: rcu_read_unlock(); - preempt_enable(); + local_bh_enable(); this_cpu_inc(tun->pcpu_stats->rx_dropped); return NULL; } @@ -1912,16 +1917,19 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct bpf_prog *xdp_prog; int ret; + local_bh_disable(); rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { ret = do_xdp_generic(xdp_prog, skb); if (ret != XDP_PASS) { rcu_read_unlock(); + local_bh_enable(); return total_len; } } rcu_read_unlock(); + local_bh_enable(); } rcu_read_lock(); @@ -3219,6 +3227,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) &tun_proto, 0); if (!tfile) return -ENOMEM; + if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { + sk_free(&tfile->sk); + return -ENOMEM; + } + RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; @@ -3239,8 +3252,6 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); - memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); - return 0; } diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 7220cd620717..0362acd5cdca 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -609,7 +609,7 @@ static const struct driver_info cdc_mbim_info_ndp_to_end = { */ static const struct driver_info cdc_mbim_info_avoid_altsetting_toggle = { .description = "CDC MBIM", - .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN, + .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN | FLAG_SEND_ZLP, .bind = cdc_mbim_bind, .unbind = cdc_mbim_unbind, .manage_power = cdc_mbim_manage_power, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 42565dd33aa6..094680871687 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1103,6 +1103,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, + {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 770422e953f7..032e1ac10a30 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -707,6 +707,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *data; u32 act; + /* Transient failure which in theory could occur if + * in-flight packets from before XDP was enabled reach + * the receive path after XDP is loaded. + */ + if (unlikely(hdr->hdr.gso_type)) + goto err_xdp; + /* This happens when rx buffer size is underestimated * or headroom is not enough because of the buffer * was refilled before XDP is set. This should only @@ -727,14 +734,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp_page = page; } - /* Transient failure which in theory could occur if - * in-flight packets from before XDP was enabled reach - * the receive path after XDP is loaded. In practice I - * was not able to create this condition. - */ - if (unlikely(hdr->hdr.gso_type)) - goto err_xdp; - /* Allow consuming headroom but reserve enough space to push * the descriptor on if we get an XDP_TX return code. */ @@ -775,7 +774,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } *xdp_xmit = true; if (unlikely(xdp_page != page)) - goto err_xdp; + put_page(page); rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: @@ -787,7 +786,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } *xdp_xmit = true; if (unlikely(xdp_page != page)) - goto err_xdp; + put_page(page); rcu_read_unlock(); goto xdp_xmit; default: @@ -875,7 +874,7 @@ err_xdp: rcu_read_unlock(); err_skb: put_page(page); - while (--num_buf) { + while (num_buf-- > 1) { buf = virtqueue_get_buf(rq->vq, &len); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 9ebe2a689966..27a9bb8c9611 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { + /* Prevent any &gdesc->tcd field from being (speculatively) + * read before (&gdesc->tcd)->gen is read. + */ + dma_rmb(); + completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( &gdesc->tcd), tq, adapter->pdev, adapter); @@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.tci = skb_vlan_tag_get(skb); } + /* Ensure that the write to (&gdesc->txd)->gen will be observed after + * all other writes to &gdesc->txd. + */ + dma_wmb(); + /* finally flips the GEN bit of the SOP desc. */ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); @@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, */ break; } + + /* Prevent any rcd field from being (speculatively) read before + * rcd->gen is read. + */ + dma_rmb(); + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && rcd->rqID != rq->dataRingQid); idx = rcd->rxdIdx; @@ -1528,6 +1544,12 @@ rcd_done: ring->next2comp = idx; num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + + /* Ensure that the writes to rxd->gen bits will be observed + * after all other writes to rxd objects. + */ + dma_wmb(); + while (num_to_alloc) { vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, &rxCmdDesc); @@ -2688,7 +2710,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p) /* ==================== initialization and cleanup routines ============ */ static int -vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) +vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter) { int err; unsigned long mmio_start, mmio_len; @@ -2700,30 +2722,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) return err; } - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { - dev_err(&pdev->dev, - "pci_set_consistent_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = true; - } else { - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { - dev_err(&pdev->dev, - "pci_set_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = false; - } - err = pci_request_selected_regions(pdev, (1 << 2) - 1, vmxnet3_driver_name); if (err) { dev_err(&pdev->dev, "Failed to request region for adapter: error %d\n", err); - goto err_set_mask; + goto err_enable_device; } pci_set_master(pdev); @@ -2751,7 +2755,7 @@ err_bar1: iounmap(adapter->hw_addr0); err_ioremap: pci_release_selected_regions(pdev, (1 << 2) - 1); -err_set_mask: +err_enable_device: pci_disable_device(pdev); return err; } @@ -3254,7 +3258,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, #endif }; int err; - bool dma64 = false; /* stupid gcc */ + bool dma64; u32 ver; struct net_device *netdev; struct vmxnet3_adapter *adapter; @@ -3300,6 +3304,24 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { + dev_err(&pdev->dev, + "pci_set_consistent_dma_mask failed\n"); + err = -EIO; + goto err_set_mask; + } + dma64 = true; + } else { + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { + dev_err(&pdev->dev, + "pci_set_dma_mask failed\n"); + err = -EIO; + goto err_set_mask; + } + dma64 = false; + } + spin_lock_init(&adapter->cmd_lock); adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, sizeof(struct vmxnet3_adapter), @@ -3307,7 +3329,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { dev_err(&pdev->dev, "Failed to map dma\n"); err = -EFAULT; - goto err_dma_map; + goto err_set_mask; } adapter->shared = dma_alloc_coherent( &adapter->pdev->dev, @@ -3358,7 +3380,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, } #endif /* VMXNET3_RSS */ - err = vmxnet3_alloc_pci_resources(adapter, &dma64); + err = vmxnet3_alloc_pci_resources(adapter); if (err < 0) goto err_alloc_pci; @@ -3504,7 +3526,7 @@ err_alloc_queue_desc: err_alloc_shared: dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); -err_dma_map: +err_set_mask: free_netdev(netdev); return err; } diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index a3326463b71f..a2c554f8a61b 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,12 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.14.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.16.0-k" -/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040e00 +/* Each byte of this 32-bit integer encodes a version number in + * VMXNET3_DRIVER_VERSION_STRING. + */ +#define VMXNET3_DRIVER_VERSION_NUM 0x01041000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index d122386c382b..b01dc34d55af 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -1482,18 +1482,6 @@ static int atmel_proc_show(struct seq_file *m, void *v) return 0; } -static int atmel_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, atmel_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations atmel_proc_fops = { - .open = atmel_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct net_device_ops atmel_netdev_ops = { .ndo_open = atmel_open, .ndo_stop = atmel_close, @@ -1614,7 +1602,8 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port, netif_carrier_off(dev); - if (!proc_create_data("driver/atmel", 0, NULL, &atmel_proc_fops, priv)) + if (!proc_create_single_data("driver/atmel", 0, NULL, atmel_proc_show, + priv)) printk(KERN_WARNING "atmel: unable to create /proc entry.\n"); printk(KERN_INFO "%s: Atmel at76c50x. Version %d.%d. MAC %pM\n", diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f8a0234d332c..5517ea4c2aa0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1590,14 +1590,13 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int max_irqs, num_irqs, i, ret, nr_online_cpus; + int max_irqs, num_irqs, i, ret; u16 pci_cmd; if (!trans->cfg->mq_rx_supported) goto enable_msi; - nr_online_cpus = num_online_cpus(); - max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES); + max_irqs = min_t(u32, num_online_cpus() + 2, IWL_MAX_RX_HW_QUEUES); for (i = 0; i < max_irqs; i++) trans_pcie->msix_entries[i].entry = i; @@ -1623,16 +1622,17 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, * Two interrupts less: non rx causes shared with FBQ and RSS. * More than two interrupts: we will use fewer RSS queues. */ - if (num_irqs <= nr_online_cpus) { + if (num_irqs <= max_irqs - 2) { trans_pcie->trans->num_rx_queues = num_irqs + 1; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | IWL_SHARED_IRQ_FIRST_RSS; - } else if (num_irqs == nr_online_cpus + 1) { + } else if (num_irqs == max_irqs - 1) { trans_pcie->trans->num_rx_queues = num_irqs; trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; } else { trans_pcie->trans->num_rx_queues = num_irqs - 1; } + WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES); trans_pcie->alloc_vecs = num_irqs; trans_pcie->msix_enabled = true; diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index b4dfe1893d18..d1884b8913e7 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev, #ifndef PRISM2_NO_PROCFS_DEBUG static int ap_debug_proc_show(struct seq_file *m, void *v) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast); seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast); @@ -81,18 +81,6 @@ static int ap_debug_proc_show(struct seq_file *m, void *v) seq_printf(m, "tx_drop_nonassoc=%u\n", ap->tx_drop_nonassoc); return 0; } - -static int ap_debug_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ap_debug_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ap_debug_proc_fops = { - .open = ap_debug_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* PRISM2_NO_PROCFS_DEBUG */ @@ -333,7 +321,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct ap_data *ap, static int ap_control_proc_show(struct seq_file *m, void *v) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); char *policy_txt; struct mac_entry *entry; @@ -365,20 +353,20 @@ static int ap_control_proc_show(struct seq_file *m, void *v) static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); spin_lock_bh(&ap->mac_restrictions.lock); return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos); } static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos); } static void ap_control_proc_stop(struct seq_file *m, void *v) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); spin_unlock_bh(&ap->mac_restrictions.lock); } @@ -389,24 +377,6 @@ static const struct seq_operations ap_control_proc_seqops = { .show = ap_control_proc_show, }; -static int ap_control_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &ap_control_proc_seqops); - if (ret == 0) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations ap_control_proc_fops = { - .open = ap_control_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - int ap_control_add_mac(struct mac_restrictions *mac_restrictions, u8 *mac) { struct mac_entry *entry; @@ -585,20 +555,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void *v) static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); spin_lock_bh(&ap->sta_table_lock); return seq_list_start_head(&ap->sta_list, *_pos); } static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); return seq_list_next(v, &ap->sta_list, _pos); } static void prism2_ap_proc_stop(struct seq_file *m, void *v) { - struct ap_data *ap = m->private; + struct ap_data *ap = PDE_DATA(file_inode(m->file)); spin_unlock_bh(&ap->sta_table_lock); } @@ -608,23 +578,6 @@ static const struct seq_operations prism2_ap_proc_seqops = { .stop = prism2_ap_proc_stop, .show = prism2_ap_proc_show, }; - -static int prism2_ap_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &prism2_ap_proc_seqops); - if (ret == 0) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations prism2_ap_proc_fops = { - .open = prism2_ap_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ @@ -896,12 +849,13 @@ void hostap_init_ap_proc(local_info_t *local) return; #ifndef PRISM2_NO_PROCFS_DEBUG - proc_create_data("ap_debug", 0, ap->proc, &ap_debug_proc_fops, ap); + proc_create_single_data("ap_debug", 0, ap->proc, ap_debug_proc_show, ap); #endif /* PRISM2_NO_PROCFS_DEBUG */ #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT - proc_create_data("ap_control", 0, ap->proc, &ap_control_proc_fops, ap); - proc_create_data("ap", 0, ap->proc, &prism2_ap_proc_fops, ap); + proc_create_seq_data("ap_control", 0, ap->proc, &ap_control_proc_seqops, + ap); + proc_create_seq_data("ap", 0, ap->proc, &prism2_ap_proc_seqops, ap); #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ } @@ -1106,18 +1060,6 @@ static int prism2_sta_proc_show(struct seq_file *m, void *v) return 0; } -static int prism2_sta_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prism2_sta_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations prism2_sta_proc_fops = { - .open = prism2_sta_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void handle_add_proc_queue(struct work_struct *work) { struct ap_data *ap = container_of(work, struct ap_data, @@ -1138,9 +1080,9 @@ static void handle_add_proc_queue(struct work_struct *work) if (sta) { sprintf(name, "%pM", sta->addr); - sta->proc = proc_create_data( + sta->proc = proc_create_single_data( name, 0, ap->proc, - &prism2_sta_proc_fops, sta); + prism2_sta_proc_show, sta); atomic_dec(&sta->users); } diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index 5c4a17a18968..2720aa39f530 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -2951,19 +2951,6 @@ static int prism2_registers_proc_show(struct seq_file *m, void *v) return 0; } - -static int prism2_registers_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prism2_registers_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations prism2_registers_proc_fops = { - .open = prism2_registers_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #endif /* PRISM2_NO_PROCFS_DEBUG */ @@ -3279,8 +3266,8 @@ static int hostap_hw_ready(struct net_device *dev) } hostap_init_proc(local); #ifndef PRISM2_NO_PROCFS_DEBUG - proc_create_data("registers", 0, local->proc, - &prism2_registers_proc_fops, local); + proc_create_single_data("registers", 0, local->proc, + prism2_registers_proc_show, local); #endif /* PRISM2_NO_PROCFS_DEBUG */ hostap_init_ap_proc(local); return 0; diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c index d234231bf532..5b33ccab9188 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_proc.c +++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c @@ -43,18 +43,6 @@ static int prism2_debug_proc_show(struct seq_file *m, void *v) return 0; } - -static int prism2_debug_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prism2_debug_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations prism2_debug_proc_fops = { - .open = prism2_debug_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* PRISM2_NO_PROCFS_DEBUG */ @@ -95,19 +83,6 @@ static int prism2_stats_proc_show(struct seq_file *m, void *v) return 0; } -static int prism2_stats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prism2_stats_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations prism2_stats_proc_fops = { - .open = prism2_stats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - static int prism2_wds_proc_show(struct seq_file *m, void *v) { struct list_head *ptr = v; @@ -122,20 +97,20 @@ static int prism2_wds_proc_show(struct seq_file *m, void *v) static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); read_lock_bh(&local->iface_lock); return seq_list_start(&local->hostap_interfaces, *_pos); } static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); return seq_list_next(v, &local->hostap_interfaces, _pos); } static void prism2_wds_proc_stop(struct seq_file *m, void *v) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); read_unlock_bh(&local->iface_lock); } @@ -146,27 +121,9 @@ static const struct seq_operations prism2_wds_proc_seqops = { .show = prism2_wds_proc_show, }; -static int prism2_wds_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &prism2_wds_proc_seqops); - if (ret == 0) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations prism2_wds_proc_fops = { - .open = prism2_wds_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - static int prism2_bss_list_proc_show(struct seq_file *m, void *v) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); struct list_head *ptr = v; struct hostap_bss_info *bss; @@ -193,20 +150,20 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v) static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); spin_lock_bh(&local->lock); return seq_list_start_head(&local->bss_list, *_pos); } static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); return seq_list_next(v, &local->bss_list, _pos); } static void prism2_bss_list_proc_stop(struct seq_file *m, void *v) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); spin_unlock_bh(&local->lock); } @@ -217,24 +174,6 @@ static const struct seq_operations prism2_bss_list_proc_seqops = { .show = prism2_bss_list_proc_show, }; -static int prism2_bss_list_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &prism2_bss_list_proc_seqops); - if (ret == 0) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations prism2_bss_list_proc_fops = { - .open = prism2_bss_list_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - static int prism2_crypt_proc_show(struct seq_file *m, void *v) { local_info_t *local = m->private; @@ -252,19 +191,6 @@ static int prism2_crypt_proc_show(struct seq_file *m, void *v) return 0; } -static int prism2_crypt_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prism2_crypt_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations prism2_crypt_proc_fops = { - .open = prism2_crypt_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf, size_t count, loff_t *_pos) { @@ -342,7 +268,7 @@ static int prism2_io_debug_proc_read(char *page, char **start, off_t off, #ifndef PRISM2_NO_STATION_MODES static int prism2_scan_results_proc_show(struct seq_file *m, void *v) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); unsigned long entry; int i, len; struct hfa384x_hostscan_result *scanres; @@ -392,7 +318,7 @@ static int prism2_scan_results_proc_show(struct seq_file *m, void *v) static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); spin_lock_bh(&local->lock); /* We have a header (pos 0) + N results to show (pos 1...N) */ @@ -403,7 +329,7 @@ static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos) static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); ++*_pos; if (*_pos > local->last_scan_results_count) @@ -413,7 +339,7 @@ static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t * static void prism2_scan_results_proc_stop(struct seq_file *m, void *v) { - local_info_t *local = m->private; + local_info_t *local = PDE_DATA(file_inode(m->file)); spin_unlock_bh(&local->lock); } @@ -423,25 +349,6 @@ static const struct seq_operations prism2_scan_results_proc_seqops = { .stop = prism2_scan_results_proc_stop, .show = prism2_scan_results_proc_show, }; - -static int prism2_scan_results_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &prism2_scan_results_proc_seqops); - if (ret == 0) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations prism2_scan_results_proc_fops = { - .open = prism2_scan_results_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - #endif /* PRISM2_NO_STATION_MODES */ @@ -463,29 +370,29 @@ void hostap_init_proc(local_info_t *local) } #ifndef PRISM2_NO_PROCFS_DEBUG - proc_create_data("debug", 0, local->proc, - &prism2_debug_proc_fops, local); + proc_create_single_data("debug", 0, local->proc, + prism2_debug_proc_show, local); #endif /* PRISM2_NO_PROCFS_DEBUG */ - proc_create_data("stats", 0, local->proc, - &prism2_stats_proc_fops, local); - proc_create_data("wds", 0, local->proc, - &prism2_wds_proc_fops, local); + proc_create_single_data("stats", 0, local->proc, prism2_stats_proc_show, + local); + proc_create_seq_data("wds", 0, local->proc, + &prism2_wds_proc_seqops, local); proc_create_data("pda", 0, local->proc, &prism2_pda_proc_fops, local); proc_create_data("aux_dump", 0, local->proc, local->func->read_aux_fops ?: &prism2_aux_dump_proc_fops, local); - proc_create_data("bss_list", 0, local->proc, - &prism2_bss_list_proc_fops, local); - proc_create_data("crypt", 0, local->proc, - &prism2_crypt_proc_fops, local); + proc_create_seq_data("bss_list", 0, local->proc, + &prism2_bss_list_proc_seqops, local); + proc_create_single_data("crypt", 0, local->proc, prism2_crypt_proc_show, + local); #ifdef PRISM2_IO_DEBUG - proc_create_data("io_debug", 0, local->proc, - &prism2_io_debug_proc_fops, local); + proc_create_single_data("io_debug", 0, local->proc, + prism2_debug_proc_show, local); #endif /* PRISM2_IO_DEBUG */ #ifndef PRISM2_NO_STATION_MODES - proc_create_data("scan_results", 0, local->proc, - &prism2_scan_results_proc_fops, local); + proc_create_seq_data("scan_results", 0, local->proc, + &prism2_scan_results_proc_seqops, local); #endif /* PRISM2_NO_STATION_MODES */ } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4a017a0d71ea..920c23e542a5 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3340,7 +3340,7 @@ out_err: static int hwsim_dump_radio_nl(struct sk_buff *skb, struct netlink_callback *cb) { - int last_idx = cb->args[0]; + int last_idx = cb->args[0] - 1; struct mac80211_hwsim_data *data = NULL; int res = 0; void *hdr; @@ -3368,7 +3368,7 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb, last_idx = data->idx; } - cb->args[0] = last_idx; + cb->args[0] = last_idx + 1; /* list changed, but no new element sent, set interrupted flag */ if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) { diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index a6884e73d2ab..7ddee980048b 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -372,16 +372,15 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev, /* * Determine IFS values - * - Use TXOP_BACKOFF for probe and management frames except beacons + * - Use TXOP_BACKOFF for management frames except beacons * - Use TXOP_SIFS for fragment bursts * - Use TXOP_HTTXOP for everything else * * Note: rt2800 devices won't use CTS protection (if used) * for frames not transmitted with TXOP_HTTXOP */ - if ((ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_beacon(hdr->frame_control)) || - (tx_info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)) + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) txdesc->u.ht.txop = TXOP_BACKOFF; else if (!(tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)) txdesc->u.ht.txop = TXOP_SIFS; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 7f9b16b97ea3..a7e0a17aa7e8 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -2663,19 +2663,6 @@ static int ray_cs_proc_show(struct seq_file *m, void *v) } return 0; } - -static int ray_cs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ray_cs_proc_show, NULL); -} - -static const struct file_operations ray_cs_proc_fops = { - .owner = THIS_MODULE, - .open = ray_cs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /*===========================================================================*/ static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type) @@ -2814,7 +2801,7 @@ static int __init init_ray_cs(void) #ifdef CONFIG_PROC_FS proc_mkdir("driver/ray_cs", NULL); - proc_create("driver/ray_cs/ray_cs", 0, NULL, &ray_cs_proc_fops); + proc_create_single("driver/ray_cs/ray_cs", 0, NULL, ray_cs_proc_show); proc_create("driver/ray_cs/essid", 0200, NULL, &ray_cs_essid_proc_fops); proc_create_data("driver/ray_cs/net_type", 0200, NULL, &int_proc_fops, &net_type); diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index c2e5a7e6bd3e..88e1f9a0faaf 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -45,18 +45,6 @@ nubus_devices_proc_show(struct seq_file *m, void *v) return 0; } -static int nubus_devices_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nubus_devices_proc_show, NULL); -} - -static const struct file_operations nubus_devices_proc_fops = { - .open = nubus_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static struct proc_dir_entry *proc_bus_nubus_dir; /* @@ -149,18 +137,6 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v) return 0; } -static int nubus_proc_rsrc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nubus_proc_rsrc_show, inode); -} - -static const struct file_operations nubus_proc_rsrc_fops = { - .open = nubus_proc_rsrc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, const struct nubus_dirent *ent, unsigned int size) @@ -176,8 +152,8 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size); else pde_data = NULL; - proc_create_data(name, S_IFREG | 0444, procdir, - &nubus_proc_rsrc_fops, pde_data); + proc_create_single_data(name, S_IFREG | 0444, procdir, + nubus_proc_rsrc_show, pde_data); } void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, @@ -190,32 +166,21 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, return; snprintf(name, sizeof(name), "%x", ent->type); - proc_create_data(name, S_IFREG | 0444, procdir, - &nubus_proc_rsrc_fops, - nubus_proc_alloc_pde_data(data, 0)); + proc_create_single_data(name, S_IFREG | 0444, procdir, + nubus_proc_rsrc_show, + nubus_proc_alloc_pde_data(data, 0)); } /* * /proc/nubus stuff */ -static int nubus_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nubus_proc_show, NULL); -} - -static const struct file_operations nubus_proc_fops = { - .open = nubus_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void __init nubus_proc_init(void) { - proc_create("nubus", 0, NULL, &nubus_proc_fops); + proc_create_single("nubus", 0, NULL, nubus_proc_show); proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL); if (!proc_bus_nubus_dir) return; - proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops); + proc_create_single("devices", 0, proc_bus_nubus_dir, + nubus_devices_proc_show); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 99b857e5a7a9..04a20da76786 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -101,6 +101,15 @@ static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); +static void nvme_queue_scan(struct nvme_ctrl *ctrl) +{ + /* + * Only new queue scan work when admin and IO queues are both alive + */ + if (ctrl->state == NVME_CTRL_LIVE) + queue_work(nvme_wq, &ctrl->scan_work); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -244,9 +253,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_cancel_request(struct request *req, void *data, bool reserved) { - if (!blk_mq_request_started(req)) - return; - dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); @@ -1033,6 +1039,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) } EXPORT_SYMBOL_GPL(nvme_set_queue_count); +#define NVME_AEN_SUPPORTED \ + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) + +static void nvme_enable_aen(struct nvme_ctrl *ctrl) +{ + u32 result; + int status; + + status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, + ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); + if (status) + dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", + ctrl->oaes & NVME_AEN_SUPPORTED); +} + static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -1351,13 +1372,19 @@ static void nvme_set_chunk_size(struct nvme_ns *ns) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } -static void nvme_config_discard(struct nvme_ctrl *ctrl, - unsigned stream_alignment, struct request_queue *queue) +static void nvme_config_discard(struct nvme_ns *ns) { + struct nvme_ctrl *ctrl = ns->ctrl; + struct request_queue *queue = ns->queue; u32 size = queue_logical_block_size(queue); - if (stream_alignment) - size *= stream_alignment; + if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + return; + } + + if (ctrl->nr_streams && ns->sws && ns->sgs) + size *= ns->sws * ns->sgs; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); @@ -1365,9 +1392,12 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; + /* If discard is already enabled, don't reset queue limits */ + if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + return; + blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1411,10 +1441,6 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; - unsigned stream_alignment = 0; - - if (ns->ctrl->nr_streams && ns->sws && ns->sgs) - stream_alignment = ns->sws * ns->sgs; blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1428,10 +1454,9 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_init_integrity(disk, ns->ms, ns->pi_type); if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) capacity = 0; - set_capacity(disk, capacity); - if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ns->ctrl, stream_alignment, disk->queue); + set_capacity(disk, capacity); + nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); } @@ -1447,8 +1472,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->lba_shift == 0) ns->lba_shift = 9; ns->noiob = le16_to_cpu(id->noiob); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; @@ -1577,7 +1602,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1589,7 +1614,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } @@ -2183,7 +2208,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { + if (!ctrl->opts->discovery_nqn && + nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); @@ -2314,7 +2340,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { ret = nvme_get_effects_log(ctrl); if (ret < 0) - return ret; + goto out_free; } if (!ctrl->identified) { @@ -2345,6 +2371,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); + ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; ctrl->cntlid = le16_to_cpup(&id->cntlid); @@ -3170,6 +3197,42 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) nvme_remove_invalid_namespaces(ctrl, nn); } +static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) +{ + size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); + __le32 *log; + int error, i; + bool ret = false; + + log = kzalloc(log_size, GFP_KERNEL); + if (!log) + return false; + + error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); + if (error) { + dev_warn(ctrl->device, + "reading changed ns log failed: %d\n", error); + goto out_free_log; + } + + if (log[0] == cpu_to_le32(0xffffffff)) + goto out_free_log; + + for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) { + u32 nsid = le32_to_cpu(log[i]); + + if (nsid == 0) + break; + dev_info(ctrl->device, "rescanning namespace %d.\n", nsid); + nvme_validate_ns(ctrl, nsid); + } + ret = true; + +out_free_log: + kfree(log); + return ret; +} + static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = @@ -3182,6 +3245,12 @@ static void nvme_scan_work(struct work_struct *work) WARN_ON_ONCE(!ctrl->tagset); + if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) { + if (nvme_scan_changed_ns_log(ctrl)) + goto out_sort_namespaces; + dev_info(ctrl->device, "rescanning namespaces.\n"); + } + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3189,26 +3258,17 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { if (!nvme_scan_ns_list(ctrl, nn)) - goto done; + goto out_free_id; } nvme_scan_ns_sequential(ctrl, nn); - done: +out_free_id: + kfree(id); +out_sort_namespaces: down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); - kfree(id); } -void nvme_queue_scan(struct nvme_ctrl *ctrl) -{ - /* - * Only new queue scan work when admin and IO queues are both alive - */ - if (ctrl->state == NVME_CTRL_LIVE) - queue_work(nvme_wq, &ctrl->scan_work); -} -EXPORT_SYMBOL_GPL(nvme_queue_scan); - /* * This function iterates the namespace list unlocked to allow recovery from * controller failure. It is up to the caller to ensure the namespace list is @@ -3322,8 +3382,23 @@ static void nvme_fw_act_work(struct work_struct *work) nvme_get_fw_slot_info(ctrl); } +static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) +{ + switch ((result & 0xff00) >> 8) { + case NVME_AER_NOTICE_NS_CHANGED: + set_bit(EVENT_NS_CHANGED, &ctrl->events); + nvme_queue_scan(ctrl); + break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + queue_work(nvme_wq, &ctrl->fw_act_work); + break; + default: + dev_warn(ctrl->device, "async event result %08x\n", result); + } +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res) + volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); @@ -3331,6 +3406,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, return; switch (result & 0x7) { + case NVME_AER_NOTICE: + nvme_handle_aen_notice(ctrl, result); + break; case NVME_AER_ERROR: case NVME_AER_SMART: case NVME_AER_CSS: @@ -3340,18 +3418,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, default: break; } - - switch (result & 0xff07) { - case NVME_AER_NOTICE_NS_CHANGED: - dev_info(ctrl->device, "rescanning\n"); - nvme_queue_scan(ctrl); - break; - case NVME_AER_NOTICE_FW_ACT_STARTING: - queue_work(nvme_wq, &ctrl->fw_act_work); - break; - default: - dev_warn(ctrl->device, "async event result %08x\n", result); - } queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_complete_async_event); @@ -3374,6 +3440,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); + nvme_enable_aen(ctrl); queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae732a77fe8..5f5f7067c41d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -545,71 +545,54 @@ blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, return BLK_STS_OK; switch (ctrl->state) { - case NVME_CTRL_DELETING: - goto reject_io; - case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: + case NVME_CTRL_DELETING: + /* + * This is the case of starting a new or deleting an association + * but connectivity was lost before it was fully created or torn + * down. We need to error the commands used to initialize the + * controller so the reconnect can go into a retry attempt. The + * commands should all be marked REQ_FAILFAST_DRIVER, which will + * hit the reject path below. Anything else will be queued while + * the state settles. + */ if (!is_connected) - /* - * This is the case of starting a new - * association but connectivity was lost - * before it was fully created. We need to - * error the commands used to initialize the - * controller so the reconnect can go into a - * retry attempt. The commands should all be - * marked REQ_FAILFAST_DRIVER, which will hit - * the reject path below. Anything else will - * be queued while the state settles. - */ - goto reject_or_queue_io; - - if ((queue_live && - !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || - (!queue_live && blk_rq_is_passthrough(rq) && - cmd->common.opcode == nvme_fabrics_command && - cmd->fabrics.fctype == nvme_fabrics_type_connect)) - /* - * If queue is live, allow only commands that - * are internally generated pass through. These - * are commands on the admin queue to initialize - * the controller. This will reject any ioctl - * admin cmds received while initializing. - * - * If the queue is not live, allow only a - * connect command. This will reject any ioctl - * admin cmd as well as initialization commands - * if the controller reverted the queue to non-live. - */ + break; + + /* + * If queue is live, allow only commands that are internally + * generated pass through. These are commands on the admin + * queue to initialize the controller. This will reject any + * ioctl admin cmds received while initializing. + */ + if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) return BLK_STS_OK; /* - * fall-thru to the reject_or_queue_io clause + * If the queue is not live, allow only a connect command. This + * will reject any ioctl admin cmd as well as initialization + * commands if the controller reverted the queue to non-live. */ + if (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect) + return BLK_STS_OK; break; - - /* these cases fall-thru - * case NVME_CTRL_LIVE: - * case NVME_CTRL_RESETTING: - */ default: break; } -reject_or_queue_io: /* - * Any other new io is something we're not in a state to send - * to the device. Default action is to busy it and retry it - * after the controller state is recovered. However, anything - * marked for failfast or nvme multipath is immediately failed. - * Note: commands used to initialize the controller will be - * marked for failfast. + * Any other new io is something we're not in a state to send to the + * device. Default action is to busy it and retry it after the + * controller state is recovered. However, anything marked for failfast + * or nvme multipath is immediately failed. Note: commands used to + * initialize the controller will be marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast. */ if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - -reject_io: nvme_req(rq)->status = NVME_SC_ABORT_REQ; return BLK_STS_IOERR; } @@ -689,10 +672,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) { - opts->kato = 0; - opts->nr_io_queues = 0; - } break; case NVMF_OPT_TRADDR: p = match_strdup(args); @@ -851,6 +830,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (opts->discovery_nqn) { + opts->kato = 0; + opts->nr_io_queues = 0; + opts->duplicate_connect = true; + } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else @@ -983,16 +967,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_module_put; } - if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { - dev_warn(ctrl->device, - "controller returned incorrect NQN: \"%s\".\n", - ctrl->subsys->subnqn); - module_put(ops->module); - up_read(&nvmf_transports_rwsem); - nvme_delete_ctrl_sync(ctrl); - return ERR_PTR(-EINVAL); - } - module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index ef46c915b7b5..0cf0460a5c92 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -139,7 +139,9 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || + if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DEAD || + strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) return false; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6cb26bcf6ec0..0bad65803271 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1686,16 +1686,6 @@ done: goto check_error; } - /* - * Force failures of commands if we're killing the controller - * or have an error on a command used to create an new association - */ - if (status && - (blk_queue_dying(rq->q) || - ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_CONNECTING)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); nvme_end_request(rq, status, result); @@ -2403,9 +2393,6 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - __nvme_fc_abort_op(ctrl, op); } @@ -3284,6 +3271,8 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) } spin_unlock_irqrestore(&nvme_fc_lock, flags); + pr_warn("%s: %s - %s combination not found\n", + __func__, opts->traddr, opts->host_traddr); return ERR_PTR(-ENOENT); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 17d2f7cf3fed..de24fe77c80b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -22,6 +22,7 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> #include <linux/fault-inject.h> +#include <linux/rcupdate.h> extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -180,6 +181,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u32 oaes; u32 aen_result; unsigned int shutdown_timeout; unsigned int kato; @@ -192,6 +194,8 @@ struct nvme_ctrl { struct delayed_work ka_work; struct nvme_command ka_cmd; struct work_struct fw_act_work; +#define EVENT_NS_CHANGED (1 << 0) + unsigned long events; /* Power saving configuration */ u64 ps_max_latency_us; @@ -398,14 +402,13 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); -void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res); + volatile union nvme_result *res); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); @@ -454,7 +457,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - if (head && ns == srcu_dereference(head->current_path, &head->srcu)) + if (head && ns == rcu_access_pointer(head->current_path)) rcu_assign_pointer(head->current_path, NULL); } struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 17a0190bd88f..e526437bacbf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -13,6 +13,7 @@ */ #include <linux/aer.h> +#include <linux/async.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/blk-mq-pci.h> @@ -68,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); struct nvme_dev; struct nvme_queue; -static void nvme_process_cq(struct nvme_queue *nvmeq); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); /* @@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) struct nvme_queue { struct device *q_dmadev; struct nvme_dev *dev; - spinlock_t q_lock; + spinlock_t sq_lock; struct nvme_command *sq_cmds; struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -159,9 +160,9 @@ struct nvme_queue { s16 cq_vector; u16 sq_tail; u16 cq_head; + u16 last_cq_head; u16 qid; u8 cq_phase; - u8 cqe_seen; u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -420,28 +421,25 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) } /** - * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell + * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send - * - * Safe to use from interrupt context */ -static void __nvme_submit_cmd(struct nvme_queue *nvmeq, - struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) { - u16 tail = nvmeq->sq_tail; - + spin_lock(&nvmeq->sq_lock); if (nvmeq->sq_cmds_io) - memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd)); + memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd, + sizeof(*cmd)); else - memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++tail == nvmeq->q_depth) - tail = 0; - if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, - nvmeq->dbbuf_sq_ei)) - writel(tail, nvmeq->q_db); - nvmeq->sq_tail = tail; + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + spin_unlock(&nvmeq->sq_lock); } static void **nvme_pci_iod_list(struct request *req) @@ -872,6 +870,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_command cmnd; blk_status_t ret; + /* + * We should not need to do this, but we're still using this to + * ensure we can drain requests on a dying queue. + */ + if (unlikely(nvmeq->cq_vector < 0)) + return BLK_STS_IOERR; + ret = nvme_setup_cmd(ns, req, &cmnd); if (ret) return ret; @@ -887,16 +892,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - - spin_lock_irq(&nvmeq->q_lock); - if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_STS_IOERR; - spin_unlock_irq(&nvmeq->q_lock); - goto out_cleanup_iod; - } - __nvme_submit_cmd(nvmeq, &cmnd); - nvme_process_cq(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + nvme_submit_cmd(nvmeq, &cmnd); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -914,10 +910,10 @@ static void nvme_pci_complete_rq(struct request *req) } /* We read the CQE phase first to check if the rest of the entry is valid */ -static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, - u16 phase) +static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; + return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == + nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -931,9 +927,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) } } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { + volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { @@ -956,83 +952,87 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } - nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } -static inline bool nvme_read_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) { - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { - *cqe = nvmeq->cqes[nvmeq->cq_head]; + while (start != end) { + nvme_handle_cqe(nvmeq, start); + if (++start == nvmeq->q_depth) + start = 0; + } +} - if (++nvmeq->cq_head == nvmeq->q_depth) { - nvmeq->cq_head = 0; - nvmeq->cq_phase = !nvmeq->cq_phase; - } - return true; +static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) +{ + if (++nvmeq->cq_head == nvmeq->q_depth) { + nvmeq->cq_head = 0; + nvmeq->cq_phase = !nvmeq->cq_phase; } - return false; } -static void nvme_process_cq(struct nvme_queue *nvmeq) +static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, int tag) { - struct nvme_completion cqe; - int consumed = 0; + bool found = false; - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; + *start = nvmeq->cq_head; + while (!found && nvme_cqe_pending(nvmeq)) { + if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found = true; + nvme_update_cq_head(nvmeq); } + *end = nvmeq->cq_head; - if (consumed) + if (*start != *end) nvme_ring_cq_doorbell(nvmeq); + return found; } static irqreturn_t nvme_irq(int irq, void *data) { - irqreturn_t result; struct nvme_queue *nvmeq = data; - spin_lock(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; - nvmeq->cqe_seen = 0; - spin_unlock(&nvmeq->q_lock); - return result; + irqreturn_t ret = IRQ_NONE; + u16 start, end; + + spin_lock(&nvmeq->cq_lock); + if (nvmeq->cq_head != nvmeq->last_cq_head) + ret = IRQ_HANDLED; + nvme_process_cq(nvmeq, &start, &end, -1); + nvmeq->last_cq_head = nvmeq->cq_head; + spin_unlock(&nvmeq->cq_lock); + + if (start != end) { + nvme_complete_cqes(nvmeq, start, end); + return IRQ_HANDLED; + } + + return ret; } static irqreturn_t nvme_irq_check(int irq, void *data) { struct nvme_queue *nvmeq = data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (nvme_cqe_pending(nvmeq)) return IRQ_WAKE_THREAD; return IRQ_NONE; } static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_completion cqe; - int found = 0, consumed = 0; + u16 start, end; + bool found; - if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (!nvme_cqe_pending(nvmeq)) return 0; - spin_lock_irq(&nvmeq->q_lock); - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; - - if (tag == cqe.command_id) { - found = 1; - break; - } - } - - if (consumed) - nvme_ring_cq_doorbell(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); + found = nvme_process_cq(nvmeq, &start, &end, tag); + spin_unlock_irq(&nvmeq->cq_lock); + nvme_complete_cqes(nvmeq, start, end); return found; } @@ -1052,10 +1052,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - - spin_lock_irq(&nvmeq->q_lock); - __nvme_submit_cmd(nvmeq, &c); - spin_unlock_irq(&nvmeq->q_lock); + nvme_submit_cmd(nvmeq, &c); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1070,7 +1067,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, - struct nvme_queue *nvmeq) + struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; @@ -1085,7 +1082,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1208,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_warn_reset(dev, csts); nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* @@ -1218,24 +1215,24 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on - * shutdown, so we return BLK_EH_HANDLED. + * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: - dev_warn(dev->ctrl.device, + dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; default: break; } @@ -1252,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - /* - * Mark the request as handled, since the inline shutdown - * forces all outstanding requests to complete. - */ nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { @@ -1321,15 +1314,21 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { int vector; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); if (nvmeq->cq_vector == -1) { - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); return 1; } vector = nvmeq->cq_vector; nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); + + /* + * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without + * having to grab the lock. + */ + mb(); if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); @@ -1342,15 +1341,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; + u16 start, end; if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); - spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); + nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock_irq(&nvmeq->cq_lock); + + nvme_complete_cqes(nvmeq, start, end); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -1408,7 +1410,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; - spin_lock_init(&nvmeq->q_lock); + spin_lock_init(&nvmeq->sq_lock); + spin_lock_init(&nvmeq->cq_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1444,7 +1447,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { struct nvme_dev *dev = nvmeq->dev; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; @@ -1452,13 +1455,14 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); } static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) { struct nvme_dev *dev = nvmeq->dev; int result; + s16 vector; if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), @@ -1471,15 +1475,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; - result = adapter_alloc_cq(dev, qid, nvmeq); + vector = dev->num_vecs == 1 ? 0 : qid; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result < 0) - goto release_vector; + goto out; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) goto release_cq; + /* + * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will + * invoke free_irq for it and cause a 'Trying to free already-free IRQ + * xxx' warning if the create CQ/SQ command times out. + */ + nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); result = queue_request_irq(nvmeq); if (result < 0) @@ -1487,13 +1497,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; - release_sq: +release_sq: + nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); - release_cq: +release_cq: adapter_delete_cq(dev, qid); - release_vector: - nvmeq->cq_vector = -1; +out: return result; } @@ -1997,19 +2007,22 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error) static void nvme_del_cq_end(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; + u16 start, end; if (!error) { unsigned long flags; /* - * We might be called with the AQ q_lock held - * and the I/O queue q_lock should always + * We might be called with the AQ cq_lock held + * and the I/O queue cq_lock should always * nest inside the AQ one. */ - spin_lock_irqsave_nested(&nvmeq->q_lock, flags, + spin_lock_irqsave_nested(&nvmeq->cq_lock, flags, SINGLE_DEPTH_NESTING); - nvme_process_cq(nvmeq); - spin_unlock_irqrestore(&nvmeq->q_lock, flags); + nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock_irqrestore(&nvmeq->cq_lock, flags); + + nvme_complete_cqes(nvmeq, start, end); } nvme_del_queue_end(req, error); @@ -2497,6 +2510,15 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } +static void nvme_async_probe(void *data, async_cookie_t cookie) +{ + struct nvme_dev *dev = data; + + nvme_reset_ctrl_sync(&dev->ctrl); + flush_work(&dev->ctrl.scan_work); + nvme_put_ctrl(&dev->ctrl); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -2541,7 +2563,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - nvme_reset_ctrl(&dev->ctrl); + nvme_get_ctrl(&dev->ctrl); + async_schedule(nvme_async_probe, dev); return 0; @@ -2685,6 +2708,9 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) static void nvme_error_resume(struct pci_dev *pdev) { + struct nvme_dev *dev = pci_get_drvdata(pdev); + + flush_work(&dev->ctrl.reset_work); pci_cleanup_aer_uncorrect_error_status(pdev); } @@ -2714,6 +2740,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, + { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ @@ -2728,6 +2756,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1eb4438a8763..7b3f08410430 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; + goto out_stop_queue; } ctrl->ctrl.sqsize = @@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) - goto out_cleanup_queue; + goto out_stop_queue; ctrl->ctrl.max_hw_sectors = (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_stop_queue; error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (error) - goto out_cleanup_queue; + goto out_stop_queue; return 0; +out_stop_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); @@ -1598,7 +1600,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) /* fail with DNR on cmd timeout */ nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index ea91fccd1bc0..01390f0e1671 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -148,8 +148,8 @@ TRACE_EVENT(nvme_complete_rq, __entry->flags = nvme_req(req)->flags; __entry->status = nvme_req(req)->status; ), - TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u", - __entry->cid, __entry->qid, __entry->result, + TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u", + __entry->qid, __entry->cid, __entry->result, __entry->retries, __entry->flags, __entry->status) ); diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index 488250189c99..8118c93391c6 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o -nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \ - discovery.o +nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ + discovery.o io-cmd-file.o io-cmd-bdev.o nvme-loop-y += loop.o nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 5e0e9fcc0d4d..ead8fbe6922e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -32,6 +32,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) +{ + nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); +} + static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, struct nvme_smart_log *slog) { @@ -45,6 +50,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, return NVME_SC_INVALID_NS; } + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + goto out; + host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); @@ -54,6 +63,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, put_unaligned_le64(data_units_read, &slog->data_units_read[0]); put_unaligned_le64(host_writes, &slog->host_writes[0]); put_unaligned_le64(data_units_written, &slog->data_units_written[0]); +out: nvmet_put_namespace(ns); return NVME_SC_SUCCESS; @@ -71,6 +81,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, rcu_read_lock(); list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + continue; host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read += part_stat_read(ns->bdev->bd_part, sectors[READ]); @@ -89,74 +102,50 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, return NVME_SC_SUCCESS; } -static u16 nvmet_get_smart_log(struct nvmet_req *req, - struct nvme_smart_log *slog) +static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) { - u16 status; + struct nvme_smart_log *log; + u16 status = NVME_SC_INTERNAL; + + if (req->data_len != sizeof(*log)) + goto out; + + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) + goto out; - WARN_ON(req == NULL || slog == NULL); if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) - status = nvmet_get_smart_log_all(req, slog); + status = nvmet_get_smart_log_all(req, log); else - status = nvmet_get_smart_log_nsid(req, slog); - return status; + status = nvmet_get_smart_log_nsid(req, log); + if (status) + goto out; + + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); +out: + nvmet_req_complete(req, status); } -static void nvmet_execute_get_log_page(struct nvmet_req *req) +static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) { - struct nvme_smart_log *smart_log; - size_t data_len = nvmet_get_log_page_len(req->cmd); - void *buf; - u16 status = 0; + struct nvmet_ctrl *ctrl = req->sq->ctrl; + u16 status = NVME_SC_INTERNAL; + size_t len; - buf = kzalloc(data_len, GFP_KERNEL); - if (!buf) { - status = NVME_SC_INTERNAL; + if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32)) goto out; - } - switch (req->cmd->get_log_page.lid) { - case NVME_LOG_ERROR: - /* - * We currently never set the More bit in the status field, - * so all error log entries are invalid and can be zeroed out. - * This is called a minum viable implementation (TM) of this - * mandatory log page. - */ - break; - case NVME_LOG_SMART: - /* - * XXX: fill out actual smart log - * - * We might have a hard time coming up with useful values for - * many of the fields, and even when we have useful data - * available (e.g. units or commands read/written) those aren't - * persistent over power loss. - */ - if (data_len != sizeof(*smart_log)) { - status = NVME_SC_INTERNAL; - goto err; - } - smart_log = buf; - status = nvmet_get_smart_log(req, smart_log); - if (status) - goto err; - break; - case NVME_LOG_FW_SLOT: - /* - * We only support a single firmware slot which always is - * active, so we can zero out the whole firmware slot log and - * still claim to fully implement this mandatory log page. - */ - break; - default: - BUG(); - } - - status = nvmet_copy_to_sgl(req, 0, buf, data_len); - -err: - kfree(buf); + mutex_lock(&ctrl->lock); + if (ctrl->nr_changed_ns == U32_MAX) + len = sizeof(__le32); + else + len = ctrl->nr_changed_ns * sizeof(__le32); + status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len); + if (!status) + status = nvmet_zero_sgl(req, len, req->data_len - len); + ctrl->nr_changed_ns = 0; + clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked); + mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); } @@ -201,7 +190,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->ver = cpu_to_le32(ctrl->subsys->ver); /* XXX: figure out what to do about RTD3R/RTD3 */ - id->oaes = cpu_to_le32(1 << 8); + id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL); id->ctratt = cpu_to_le32(1 << 0); id->oacs = 0; @@ -447,6 +436,16 @@ static void nvmet_execute_set_features(struct nvmet_req *req) req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; + case NVME_FEAT_ASYNC_EVENT: + val32 = le32_to_cpu(req->cmd->common.cdw10[1]); + if (val32 & ~NVMET_AEN_CFG_ALL) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); + nvmet_set_result(req, val32); + break; case NVME_FEAT_HOST_ID: status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; break; @@ -485,9 +484,10 @@ static void nvmet_execute_get_features(struct nvmet_req *req) break; case NVME_FEAT_WRITE_ATOMIC: break; +#endif case NVME_FEAT_ASYNC_EVENT: + nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); break; -#endif case NVME_FEAT_VOLATILE_WC: nvmet_set_result(req, 1); break; @@ -548,8 +548,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; u16 ret; - req->ns = NULL; - ret = nvmet_check_ctrl_status(req, cmd); if (unlikely(ret)) return ret; @@ -560,9 +558,28 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) switch (cmd->get_log_page.lid) { case NVME_LOG_ERROR: + /* + * We currently never set the More bit in the status + * field, so all error log entries are invalid and can + * be zeroed out. This is called a minum viable + * implementation (TM) of this mandatory log page. + */ + req->execute = nvmet_execute_get_log_page_noop; + return 0; case NVME_LOG_SMART: + req->execute = nvmet_execute_get_log_page_smart; + return 0; case NVME_LOG_FW_SLOT: - req->execute = nvmet_execute_get_log_page; + /* + * We only support a single firmware slot which always + * is active, so we can zero out the whole firmware slot + * log and still claim to fully implement this mandatory + * log page. + */ + req->execute = nvmet_execute_get_log_page_noop; + return 0; + case NVME_LOG_CHANGED_NS: + req->execute = nvmet_execute_get_log_changed_ns; return 0; } break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index e95424f172fd..a03da764ecae 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -57,6 +57,13 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) return 0; } +u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) +{ + if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) + return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; + return 0; +} + static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) { struct nvmet_ns *ns; @@ -137,6 +144,51 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } +static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) +{ + if (!(READ_ONCE(ctrl->aen_enabled) & aen)) + return true; + return test_and_set_bit(aen, &ctrl->aen_masked); +} + +static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) +{ + u32 i; + + mutex_lock(&ctrl->lock); + if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) + goto out_unlock; + + for (i = 0; i < ctrl->nr_changed_ns; i++) { + if (ctrl->changed_ns_list[i] == nsid) + goto out_unlock; + } + + if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { + ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); + ctrl->nr_changed_ns = U32_MAX; + goto out_unlock; + } + + ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; +out_unlock: + mutex_unlock(&ctrl->lock); +} + +static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) +{ + struct nvmet_ctrl *ctrl; + + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); + if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) + continue; + nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_NOTICE_NS_CHANGED, + NVME_LOG_CHANGED_NS); + } +} + int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) { int ret = 0; @@ -271,33 +323,31 @@ void nvmet_put_namespace(struct nvmet_ns *ns) percpu_ref_put(&ns->ref); } +static void nvmet_ns_dev_disable(struct nvmet_ns *ns) +{ + nvmet_bdev_ns_disable(ns); + nvmet_file_ns_disable(ns); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; - struct nvmet_ctrl *ctrl; int ret = 0; mutex_lock(&subsys->lock); if (ns->enabled) goto out_unlock; - ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, - NULL); - if (IS_ERR(ns->bdev)) { - pr_err("failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); - ret = PTR_ERR(ns->bdev); - ns->bdev = NULL; + ret = nvmet_bdev_ns_enable(ns); + if (ret) + ret = nvmet_file_ns_enable(ns); + if (ret) goto out_unlock; - } - - ns->size = i_size_read(ns->bdev->bd_inode); - ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL); if (ret) - goto out_blkdev_put; + goto out_dev_put; if (ns->nsid > subsys->max_nsid) subsys->max_nsid = ns->nsid; @@ -320,24 +370,20 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_add_tail_rcu(&ns->dev_link, &old->dev_link); } - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - + nvmet_ns_changed(subsys, ns->nsid); ns->enabled = true; ret = 0; out_unlock: mutex_unlock(&subsys->lock); return ret; -out_blkdev_put: - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); - ns->bdev = NULL; +out_dev_put: + nvmet_ns_dev_disable(ns); goto out_unlock; } void nvmet_ns_disable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; - struct nvmet_ctrl *ctrl; mutex_lock(&subsys->lock); if (!ns->enabled) @@ -363,11 +409,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns) percpu_ref_exit(&ns->ref); mutex_lock(&subsys->lock); - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - - if (ns->bdev) - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); + nvmet_ns_changed(subsys, ns->nsid); + nvmet_ns_dev_disable(ns); out_unlock: mutex_unlock(&subsys->lock); } @@ -499,6 +542,25 @@ int nvmet_sq_init(struct nvmet_sq *sq) } EXPORT_SYMBOL_GPL(nvmet_sq_init); +static u16 nvmet_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + u16 ret; + + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; + + req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); + if (unlikely(!req->ns)) + return NVME_SC_INVALID_NS | NVME_SC_DNR; + + if (req->ns->file) + return nvmet_file_parse_io_cmd(req); + else + return nvmet_bdev_parse_io_cmd(req); +} + bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) { @@ -710,15 +772,14 @@ out: u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) { if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); - req->ns = NULL; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } return 0; @@ -809,12 +870,18 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); + + ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, + sizeof(__le32), GFP_KERNEL); + if (!ctrl->changed_ns_list) + goto out_free_ctrl; ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), GFP_KERNEL); if (!ctrl->cqs) - goto out_free_ctrl; + goto out_free_changed_ns_list; ctrl->sqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_sq *), @@ -872,6 +939,8 @@ out_free_sqs: kfree(ctrl->sqs); out_free_cqs: kfree(ctrl->cqs); +out_free_changed_ns_list: + kfree(ctrl->changed_ns_list); out_free_ctrl: kfree(ctrl); out_put_subsystem: @@ -898,6 +967,7 @@ static void nvmet_ctrl_free(struct kref *ref) kfree(ctrl->sqs); kfree(ctrl->cqs); + kfree(ctrl->changed_ns_list); kfree(ctrl); nvmet_subsys_put(subsys); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 231e04e0a496..08656b849bd6 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while not ready\n", cmd->common.opcode); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 19e9e42ae943..d84ae004cb85 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - switch (cmd->fabrics.fctype) { case nvme_fabrics_type_property_set: req->data_len = 0; @@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 33ee8d3145f8..408279cb6f2c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -31,7 +31,7 @@ /* *************************** Data Structures/Defines ****************** */ -#define NVMET_LS_CTX_COUNT 4 +#define NVMET_LS_CTX_COUNT 256 /* for this implementation, assume small single frame rqst/rsp */ #define NVME_FC_MAX_LS_BUFFER_SIZE 2048 diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd-bdev.c index cd2344179673..e0b0f7df70c2 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -16,6 +16,34 @@ #include <linux/module.h> #include "nvmet.h" +int nvmet_bdev_ns_enable(struct nvmet_ns *ns) +{ + int ret; + + ns->bdev = blkdev_get_by_path(ns->device_path, + FMODE_READ | FMODE_WRITE, NULL); + if (IS_ERR(ns->bdev)) { + ret = PTR_ERR(ns->bdev); + if (ret != -ENOTBLK) { + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); + } + ns->bdev = NULL; + return ret; + } + ns->size = i_size_read(ns->bdev->bd_inode); + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + return 0; +} + +void nvmet_bdev_ns_disable(struct nvmet_ns *ns) +{ + if (ns->bdev) { + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + ns->bdev = NULL; + } +} + static void nvmet_bio_done(struct bio *bio) { struct nvmet_req *req = bio->bi_private; @@ -23,20 +51,14 @@ static void nvmet_bio_done(struct bio *bio) nvmet_req_complete(req, bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); - if (bio != &req->inline_bio) + if (bio != &req->b.inline_bio) bio_put(bio); } -static inline u32 nvmet_rw_len(struct nvmet_req *req) -{ - return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << - req->ns->blksize_shift; -} - -static void nvmet_execute_rw(struct nvmet_req *req) +static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; - struct bio *bio = &req->inline_bio; + struct bio *bio = &req->b.inline_bio; struct scatterlist *sg; sector_t sector; blk_qc_t cookie; @@ -89,9 +111,9 @@ static void nvmet_execute_rw(struct nvmet_req *req) blk_poll(bdev_get_queue(req->ns->bdev), cookie); } -static void nvmet_execute_flush(struct nvmet_req *req) +static void nvmet_bdev_execute_flush(struct nvmet_req *req) { - struct bio *bio = &req->inline_bio; + struct bio *bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); bio_set_dev(bio, req->ns->bdev); @@ -102,7 +124,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) submit_bio(bio); } -static u16 nvmet_discard_range(struct nvmet_ns *ns, +static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns, struct nvme_dsm_range *range, struct bio **bio) { int ret; @@ -116,7 +138,7 @@ static u16 nvmet_discard_range(struct nvmet_ns *ns, return 0; } -static void nvmet_execute_discard(struct nvmet_req *req) +static void nvmet_bdev_execute_discard(struct nvmet_req *req) { struct nvme_dsm_range range; struct bio *bio = NULL; @@ -129,7 +151,7 @@ static void nvmet_execute_discard(struct nvmet_req *req) if (status) break; - status = nvmet_discard_range(req->ns, &range, &bio); + status = nvmet_bdev_discard_range(req->ns, &range, &bio); if (status) break; } @@ -148,11 +170,11 @@ static void nvmet_execute_discard(struct nvmet_req *req) } } -static void nvmet_execute_dsm(struct nvmet_req *req) +static void nvmet_bdev_execute_dsm(struct nvmet_req *req) { switch (le32_to_cpu(req->cmd->dsm.attributes)) { case NVME_DSMGMT_AD: - nvmet_execute_discard(req); + nvmet_bdev_execute_discard(req); return; case NVME_DSMGMT_IDR: case NVME_DSMGMT_IDW: @@ -163,7 +185,7 @@ static void nvmet_execute_dsm(struct nvmet_req *req) } } -static void nvmet_execute_write_zeroes(struct nvmet_req *req) +static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) { struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; struct bio *bio = NULL; @@ -189,38 +211,27 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) } } -u16 nvmet_parse_io_cmd(struct nvmet_req *req) +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - u16 ret; - - ret = nvmet_check_ctrl_status(req, cmd); - if (unlikely(ret)) { - req->ns = NULL; - return ret; - } - - req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (unlikely(!req->ns)) - return NVME_SC_INVALID_NS | NVME_SC_DNR; switch (cmd->common.opcode) { case nvme_cmd_read: case nvme_cmd_write: - req->execute = nvmet_execute_rw; + req->execute = nvmet_bdev_execute_rw; req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: - req->execute = nvmet_execute_flush; + req->execute = nvmet_bdev_execute_flush; req->data_len = 0; return 0; case nvme_cmd_dsm: - req->execute = nvmet_execute_dsm; + req->execute = nvmet_bdev_execute_dsm; req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * sizeof(struct nvme_dsm_range); return 0; case nvme_cmd_write_zeroes: - req->execute = nvmet_execute_write_zeroes; + req->execute = nvmet_bdev_execute_write_zeroes; return 0; default: pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c new file mode 100644 index 000000000000..8c42b3a8c420 --- /dev/null +++ b/drivers/nvme/target/io-cmd-file.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe Over Fabrics Target File I/O commands implementation. + * Copyright (c) 2017-2018 Western Digital Corporation or its + * affiliates. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/uio.h> +#include <linux/falloc.h> +#include <linux/file.h> +#include "nvmet.h" + +#define NVMET_MAX_MPOOL_BVEC 16 +#define NVMET_MIN_MPOOL_OBJ 16 + +void nvmet_file_ns_disable(struct nvmet_ns *ns) +{ + if (ns->file) { + mempool_destroy(ns->bvec_pool); + ns->bvec_pool = NULL; + kmem_cache_destroy(ns->bvec_cache); + ns->bvec_cache = NULL; + fput(ns->file); + ns->file = NULL; + } +} + +int nvmet_file_ns_enable(struct nvmet_ns *ns) +{ + int ret; + struct kstat stat; + + ns->file = filp_open(ns->device_path, + O_RDWR | O_LARGEFILE | O_DIRECT, 0); + if (IS_ERR(ns->file)) { + pr_err("failed to open file %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->file)); + return PTR_ERR(ns->file); + } + + ret = vfs_getattr(&ns->file->f_path, + &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + if (ret) + goto err; + + ns->size = stat.size; + ns->blksize_shift = file_inode(ns->file)->i_blkbits; + + ns->bvec_cache = kmem_cache_create("nvmet-bvec", + NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ns->bvec_cache) { + ret = -ENOMEM; + goto err; + } + + ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, + mempool_free_slab, ns->bvec_cache); + + if (!ns->bvec_pool) { + ret = -ENOMEM; + goto err; + } + + return ret; +err: + ns->size = 0; + ns->blksize_shift = 0; + nvmet_file_ns_disable(ns); + return ret; +} + +static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) +{ + bv->bv_page = sg_page_iter_page(iter); + bv->bv_offset = iter->sg->offset; + bv->bv_len = PAGE_SIZE - iter->sg->offset; +} + +static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, + unsigned long nr_segs, size_t count) +{ + struct kiocb *iocb = &req->f.iocb; + ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); + struct iov_iter iter; + int ki_flags = 0, rw; + ssize_t ret; + + if (req->cmd->rw.opcode == nvme_cmd_write) { + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + ki_flags = IOCB_DSYNC; + call_iter = req->ns->file->f_op->write_iter; + rw = WRITE; + } else { + call_iter = req->ns->file->f_op->read_iter; + rw = READ; + } + + iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count); + + iocb->ki_pos = pos; + iocb->ki_filp = req->ns->file; + iocb->ki_flags = IOCB_DIRECT | ki_flags; + + ret = call_iter(iocb, &iter); + + if (ret != -EIOCBQUEUED && iocb->ki_complete) + iocb->ki_complete(iocb, ret, 0); + + return ret; +} + +static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) +{ + struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); + + if (req->f.bvec != req->inline_bvec) { + if (likely(req->f.mpool_alloc == false)) + kfree(req->f.bvec); + else + mempool_free(req->f.bvec, req->ns->bvec_pool); + } + + nvmet_req_complete(req, ret != req->data_len ? + NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + struct sg_page_iter sg_pg_iter; + unsigned long bv_cnt = 0; + bool is_sync = false; + size_t len = 0, total_len = 0; + ssize_t ret = 0; + loff_t pos; + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + req->f.mpool_alloc = false; + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + if (nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; + } + + pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; + + memset(&req->f.iocb, 0, sizeof(struct kiocb)); + for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { + nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); + len += req->f.bvec[bv_cnt].bv_len; + total_len += req->f.bvec[bv_cnt].bv_len; + bv_cnt++; + + WARN_ON_ONCE((nr_bvec - 1) < 0); + + if (unlikely(is_sync) && + (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + if (ret < 0) + goto out; + pos += len; + bv_cnt = 0; + len = 0; + } + nr_bvec--; + } + + if (WARN_ON_ONCE(total_len != req->data_len)) + ret = -EIO; +out: + if (unlikely(is_sync || ret)) { + nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); + return; + } + req->f.iocb.ki_complete = nvmet_file_io_done; + nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); +} + +static void nvmet_file_flush_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + int ret; + + ret = vfs_fsync(req->ns->file, 1); + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_flush(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_flush_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_execute_discard(struct nvmet_req *req) +{ + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + struct nvme_dsm_range range; + loff_t offset; + loff_t len; + int i, ret; + + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { + if (nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range))) + break; + offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + ret = vfs_fallocate(req->ns->file, mode, offset, len); + if (ret) + break; + } + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_dsm_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + + switch (le32_to_cpu(req->cmd->dsm.attributes)) { + case NVME_DSMGMT_AD: + nvmet_file_execute_discard(req); + return; + case NVME_DSMGMT_IDR: + case NVME_DSMGMT_IDW: + default: + /* Not supported yet */ + nvmet_req_complete(req, 0); + return; + } +} + +static void nvmet_file_execute_dsm(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_dsm_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_write_zeroes_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE; + loff_t offset; + loff_t len; + int ret; + + offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift; + len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + req->ns->blksize_shift); + + ret = vfs_fallocate(req->ns->file, mode, offset, len); + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); + schedule_work(&req->f.work); +} + +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->execute = nvmet_file_execute_rw; + req->data_len = nvmet_rw_len(req); + return 0; + case nvme_cmd_flush: + req->execute = nvmet_file_execute_flush; + req->data_len = 0; + return 0; + case nvme_cmd_dsm: + req->execute = nvmet_file_execute_dsm; + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); + return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_file_execute_write_zeroes; + req->data_len = 0; + return 0; + default: + pr_err("unhandled cmd for file ns %d on qid %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } +} diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 27a8561c0cb9..1304ec3a7ede 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -45,6 +45,7 @@ struct nvme_loop_ctrl { struct nvme_ctrl ctrl; struct nvmet_ctrl *target_ctrl; + struct nvmet_port *port; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -63,7 +64,8 @@ struct nvme_loop_queue { unsigned long flags; }; -static struct nvmet_port *nvmet_loop_port; +static LIST_HEAD(nvme_loop_ports); +static DEFINE_MUTEX(nvme_loop_ports_mutex); static LIST_HEAD(nvme_loop_ctrl_list); static DEFINE_MUTEX(nvme_loop_ctrl_mutex); @@ -146,7 +148,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) /* fail with DNR on admin cmd timeout */ nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -169,12 +171,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; - iod->req.port = nvmet_loop_port; + iod->req.port = queue->ctrl->port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - if (blk_rq_payload_bytes(req)) { + if (blk_rq_nr_phys_segments(req)) { iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), @@ -517,6 +519,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .free_ctrl = nvme_loop_free_ctrl, .submit_async_event = nvme_loop_submit_async_event, .delete_ctrl = nvme_loop_delete_ctrl_host, + .get_address = nvmf_get_address, }; static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) @@ -565,6 +568,23 @@ out_destroy_queues: return ret; } +static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl) +{ + struct nvmet_port *p, *found = NULL; + + mutex_lock(&nvme_loop_ports_mutex); + list_for_each_entry(p, &nvme_loop_ports, entry) { + /* if no transport address is specified use the first port */ + if ((ctrl->opts->mask & NVMF_OPT_TRADDR) && + strcmp(ctrl->opts->traddr, p->disc_addr.traddr)) + continue; + found = p; + break; + } + mutex_unlock(&nvme_loop_ports_mutex); + return found; +} + static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -589,6 +609,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; + ctrl->port = nvme_loop_find_port(&ctrl->ctrl); ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues), GFP_KERNEL); @@ -646,27 +667,17 @@ out_put_ctrl: static int nvme_loop_add_port(struct nvmet_port *port) { - /* - * XXX: disalow adding more than one port so - * there is no connection rejections when a - * a subsystem is assigned to a port for which - * loop doesn't have a pointer. - * This scenario would be possible if we allowed - * more than one port to be added and a subsystem - * was assigned to a port other than nvmet_loop_port. - */ - - if (nvmet_loop_port) - return -EPERM; - - nvmet_loop_port = port; + mutex_lock(&nvme_loop_ports_mutex); + list_add_tail(&port->entry, &nvme_loop_ports); + mutex_unlock(&nvme_loop_ports_mutex); return 0; } static void nvme_loop_remove_port(struct nvmet_port *port) { - if (port == nvmet_loop_port) - nvmet_loop_port = NULL; + mutex_lock(&nvme_loop_ports_mutex); + list_del_init(&port->entry); + mutex_unlock(&nvme_loop_ports_mutex); } static const struct nvmet_fabrics_ops nvme_loop_ops = { @@ -682,6 +693,7 @@ static struct nvmf_transport_ops nvme_loop_transport = { .name = "loop", .module = THIS_MODULE, .create_ctrl = nvme_loop_create_ctrl, + .allowed_opts = NVMF_OPT_TRADDR, }; static int __init nvme_loop_init_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 15fd84ab21f8..480dfe10fad9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -30,6 +30,21 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 + +/* + * Supported optional AENs: + */ +#define NVMET_AEN_CFG_OPTIONAL \ + NVME_AEN_CFG_NS_ATTR + +/* + * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): + */ +#define NVMET_AEN_CFG_ALL \ + (NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \ + NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \ + NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL) + /* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM * The 16 bit shift is to set IATTR bit to 1, which means offending * offset starts in the data section of connect() @@ -43,6 +58,7 @@ struct nvmet_ns { struct list_head dev_link; struct percpu_ref ref; struct block_device *bdev; + struct file *file; u32 nsid; u32 blksize_shift; loff_t size; @@ -57,6 +73,8 @@ struct nvmet_ns { struct config_group group; struct completion disable_done; + mempool_t *bvec_pool; + struct kmem_cache *bvec_cache; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) @@ -82,7 +100,7 @@ struct nvmet_sq { /** * struct nvmet_port - Common structure to keep port * information for the target. - * @entry: List head for holding a list of these elements. + * @entry: Entry into referrals or transport list. * @disc_addr: Address information is stored in a format defined * for a discovery log page entry. * @group: ConfigFS group for this element's folder. @@ -120,6 +138,8 @@ struct nvmet_ctrl { u16 cntlid; u32 kato; + u32 aen_enabled; + unsigned long aen_masked; struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; unsigned int nr_async_event_cmds; struct list_head async_events; @@ -132,6 +152,9 @@ struct nvmet_ctrl { const struct nvmet_fabrics_ops *ops; + __le32 *changed_ns_list; + u32 nr_changed_ns; + char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; }; @@ -222,8 +245,18 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; - struct bio inline_bio; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; + union { + struct { + struct bio inline_bio; + } b; + struct { + bool mpool_alloc; + struct kiocb iocb; + struct bio_vec *bvec; + struct work_struct work; + } f; + }; int sg_cnt; /* data length as parsed from the command: */ size_t data_len; @@ -263,7 +296,8 @@ struct nvmet_async_event { }; u16 nvmet_parse_connect_cmd(struct nvmet_req *req); -u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); u16 nvmet_parse_admin_cmd(struct nvmet_req *req); u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); @@ -316,6 +350,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, size_t len); u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len); +u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); @@ -338,4 +373,14 @@ extern struct rw_semaphore nvmet_config_sem; bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, const char *hostnqn); +int nvmet_bdev_ns_enable(struct nvmet_ns *ns); +int nvmet_file_ns_enable(struct nvmet_ns *ns); +void nvmet_bdev_ns_disable(struct nvmet_ns *ns); +void nvmet_file_ns_disable(struct nvmet_ns *ns); + +static inline u32 nvmet_rw_len(struct nvmet_req *req) +{ + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << + req->ns->blksize_shift; +} #endif /* _NVMET_H */ diff --git a/drivers/of/device.c b/drivers/of/device.c index 064c818105bd..33d85511d790 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -76,6 +76,8 @@ int of_device_add(struct platform_device *ofdev) * of_dma_configure - Setup DMA configuration * @dev: Device to apply DMA configuration * @np: Pointer to OF node having DMA configuration + * @force_dma: Whether device is to be set up by of_dma_configure() even if + * DMA capability is not explicitly described by firmware. * * Try to get devices's DMA configuration from DT and update it * accordingly. @@ -84,7 +86,7 @@ int of_device_add(struct platform_device *ofdev) * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events * to fix up DMA configuration. */ -int of_dma_configure(struct device *dev, struct device_node *np) +int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma) { u64 dma_addr, paddr, size = 0; int ret; @@ -100,7 +102,7 @@ int of_dma_configure(struct device *dev, struct device_node *np) * DMA configuration regardless of whether "dma-ranges" is * correctly specified or not. */ - if (!dev->bus->force_dma) + if (!force_dma) return ret == -ENODEV ? 0 : ret; dma_addr = offset = 0; diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 9a4f4246231d..895c83e0c7b6 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -353,7 +353,7 @@ int of_reserved_mem_device_init_by_idx(struct device *dev, /* ensure that dma_ops is set for virtual devices * using reserved memory */ - of_dma_configure(dev, np); + of_dma_configure(dev, np, true); dev_info(dev, "assigned reserved memory node %s\n", rmem->name); } else { diff --git a/drivers/parisc/Kconfig b/drivers/parisc/Kconfig index 3a102a84d637..5a48b5606110 100644 --- a/drivers/parisc/Kconfig +++ b/drivers/parisc/Kconfig @@ -103,11 +103,6 @@ config IOMMU_SBA depends on PCI_LBA default PCI_LBA -config IOMMU_HELPER - bool - depends on IOMMU_SBA || IOMMU_CCIO - default y - source "drivers/pcmcia/Kconfig" endmenu diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 297599fcbc32..614823617b8b 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1108,19 +1108,6 @@ static int ccio_proc_info(struct seq_file *m, void *p) return 0; } -static int ccio_proc_info_open(struct inode *inode, struct file *file) -{ - return single_open(file, &ccio_proc_info, NULL); -} - -static const struct file_operations ccio_proc_info_fops = { - .owner = THIS_MODULE, - .open = ccio_proc_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ccio_proc_bitmap_info(struct seq_file *m, void *p) { struct ioc *ioc = ioc_list; @@ -1135,19 +1122,6 @@ static int ccio_proc_bitmap_info(struct seq_file *m, void *p) return 0; } - -static int ccio_proc_bitmap_open(struct inode *inode, struct file *file) -{ - return single_open(file, &ccio_proc_bitmap_info, NULL); -} - -static const struct file_operations ccio_proc_bitmap_fops = { - .owner = THIS_MODULE, - .open = ccio_proc_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ /** @@ -1589,15 +1563,13 @@ static int __init ccio_probe(struct parisc_device *dev) #ifdef CONFIG_PROC_FS if (ioc_count == 0) { - proc_create(MODULE_NAME, 0, proc_runway_root, - &ccio_proc_info_fops); - proc_create(MODULE_NAME"-bitmap", 0, proc_runway_root, - &ccio_proc_bitmap_fops); + proc_create_single(MODULE_NAME, 0, proc_runway_root, + ccio_proc_info); + proc_create_single(MODULE_NAME"-bitmap", 0, proc_runway_root, + ccio_proc_bitmap_info); } #endif ioc_count++; - - parisc_has_iommu(); return 0; } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 0a9c762a70fa..11de0eccf968 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1864,20 +1864,6 @@ static int sba_proc_info(struct seq_file *m, void *p) } static int -sba_proc_open(struct inode *i, struct file *f) -{ - return single_open(f, &sba_proc_info, NULL); -} - -static const struct file_operations sba_proc_fops = { - .owner = THIS_MODULE, - .open = sba_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int sba_proc_bitmap_info(struct seq_file *m, void *p) { struct sba_device *sba_dev = sba_list; @@ -1889,20 +1875,6 @@ sba_proc_bitmap_info(struct seq_file *m, void *p) return 0; } - -static int -sba_proc_bitmap_open(struct inode *i, struct file *f) -{ - return single_open(f, &sba_proc_bitmap_info, NULL); -} - -static const struct file_operations sba_proc_bitmap_fops = { - .owner = THIS_MODULE, - .open = sba_proc_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ static const struct parisc_device_id sba_tbl[] __initconst = { @@ -2014,11 +1986,9 @@ static int __init sba_driver_callback(struct parisc_device *dev) break; } - proc_create("sba_iommu", 0, root, &sba_proc_fops); - proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops); + proc_create_single("sba_iommu", 0, root, sba_proc_info); + proc_create_single("sba_iommu-bitmap", 0, root, sba_proc_bitmap_info); #endif - - parisc_has_iommu(); return 0; } diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34b56a8f8480..29a487f31dae 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -5,10 +5,6 @@ source "drivers/pci/pcie/Kconfig" -config PCI_BUS_ADDR_T_64BIT - def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) - depends on PCI - config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" depends on PCI diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index bc2ded4c451f..35b7fc87eac5 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -120,7 +120,7 @@ int devm_request_pci_bus_resources(struct device *dev, EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources); static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; -#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT static struct pci_bus_region pci_64_bit = {0, (pci_bus_addr_t) 0xffffffffffffffffULL}; static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL, @@ -230,7 +230,7 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t), void *alignf_data) { -#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT int rc; if (res->flags & IORESOURCE_MEM_64) { diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index b9a131137e64..f8269a725667 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -16,6 +16,8 @@ #include <linux/pm_runtime.h> #include <linux/suspend.h> #include <linux/kexec.h> +#include <linux/of_device.h> +#include <linux/acpi.h> #include "pci.h" #include "pcie/portdrv.h" @@ -1577,6 +1579,35 @@ static int pci_bus_num_vf(struct device *dev) return pci_num_vf(to_pci_dev(dev)); } +/** + * pci_dma_configure - Setup DMA configuration + * @dev: ptr to dev structure + * + * Function to update PCI devices's DMA configuration using the same + * info from the OF node or ACPI node of host bridge's parent (if any). + */ +static int pci_dma_configure(struct device *dev) +{ + struct device *bridge; + int ret = 0; + + bridge = pci_get_host_bridge_device(to_pci_dev(dev)); + + if (IS_ENABLED(CONFIG_OF) && bridge->parent && + bridge->parent->of_node) { + ret = of_dma_configure(dev, bridge->parent->of_node, true); + } else if (has_acpi_companion(bridge)) { + struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); + enum dev_dma_attr attr = acpi_get_dma_attr(adev); + + if (attr != DEV_DMA_NOT_SUPPORTED) + ret = acpi_dma_configure(dev, attr); + } + + pci_put_host_bridge_device(bridge); + return ret; +} + struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1589,7 +1620,7 @@ struct bus_type pci_bus_type = { .drv_groups = pci_drv_groups, .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, - .force_dma = true, + .dma_configure = pci_dma_configure, }; EXPORT_SYMBOL(pci_bus_type); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 1ee8927a0635..7ac035af39f0 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -435,25 +435,12 @@ int pci_proc_detach_bus(struct pci_bus *bus) return 0; } -static int proc_bus_pci_dev_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_bus_pci_devices_op); -} - -static const struct file_operations proc_bus_pci_dev_operations = { - .owner = THIS_MODULE, - .open = proc_bus_pci_dev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init pci_proc_init(void) { struct pci_dev *dev = NULL; proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); - proc_create("devices", 0, proc_bus_pci_dir, - &proc_bus_pci_dev_operations); + proc_create_seq("devices", 0, proc_bus_pci_dir, + &proc_bus_pci_devices_op); proc_initialized = 1; for_each_pci_dev(dev) pci_proc_attach_device(dev); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index e7bbdf947bbc..8350ca2311c7 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -91,6 +91,8 @@ static int send_command(struct cros_ec_device *ec_dev, usleep_range(10000, 11000); ret = (*xfer_fxn)(ec_dev, status_msg); + if (ret == -EAGAIN) + continue; if (ret < 0) break; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a32c5c00e0e7..ffffb9909ae1 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -163,6 +163,16 @@ MODULE_LICENSE("GPL"); static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; +static bool ashs_present(void) +{ + int i = 0; + while (ashs_ids[i]) { + if (acpi_dev_found(ashs_ids[i++])) + return true; + } + return false; +} + struct bios_args { u32 arg0; u32 arg1; @@ -1025,6 +1035,9 @@ static int asus_new_rfkill(struct asus_wmi *asus, static void asus_wmi_rfkill_exit(struct asus_wmi *asus) { + if (asus->driver->wlan_ctrl_by_user && ashs_present()) + return; + asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7"); @@ -2121,16 +2134,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } -static bool ashs_present(void) -{ - int i = 0; - while (ashs_ids[i]) { - if (acpi_dev_found(ashs_ids[i++])) - return true; - } - return false; -} - /* * WMI Driver */ diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index e8d058c5ef21..eef76bfa5d73 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -1689,19 +1689,6 @@ static int version_proc_show(struct seq_file *m, void *v) return 0; } -static int version_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, version_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations version_proc_fops = { - .owner = THIS_MODULE, - .open = version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * Proc and module init */ @@ -1722,8 +1709,8 @@ static void create_toshiba_proc_entries(struct toshiba_acpi_dev *dev) if (dev->hotkey_dev) proc_create_data("keys", S_IRUGO | S_IWUSR, toshiba_proc_dir, &keys_proc_fops, dev); - proc_create_data("version", S_IRUGO, toshiba_proc_dir, - &version_proc_fops, dev); + proc_create_single_data("version", S_IRUGO, toshiba_proc_dir, + version_proc_show, dev); } static void remove_toshiba_proc_entries(struct toshiba_acpi_dev *dev) diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index 7d4aca7948dd..fe1c8f5d9af0 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -47,19 +47,6 @@ static int pnpconfig_proc_show(struct seq_file *m, void *v) return 0; } -static int pnpconfig_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pnpconfig_proc_show, NULL); -} - -static const struct file_operations pnpconfig_proc_fops = { - .owner = THIS_MODULE, - .open = pnpconfig_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int escd_info_proc_show(struct seq_file *m, void *v) { struct escd_info_struc escd; @@ -74,19 +61,6 @@ static int escd_info_proc_show(struct seq_file *m, void *v) return 0; } -static int escd_info_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, escd_info_proc_show, NULL); -} - -static const struct file_operations escd_info_proc_fops = { - .owner = THIS_MODULE, - .open = escd_info_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #define MAX_SANE_ESCD_SIZE (32*1024) static int escd_proc_show(struct seq_file *m, void *v) { @@ -129,19 +103,6 @@ static int escd_proc_show(struct seq_file *m, void *v) return 0; } -static int escd_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, escd_proc_show, NULL); -} - -static const struct file_operations escd_proc_fops = { - .owner = THIS_MODULE, - .open = escd_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pnp_legacyres_proc_show(struct seq_file *m, void *v) { void *buf; @@ -159,19 +120,6 @@ static int pnp_legacyres_proc_show(struct seq_file *m, void *v) return 0; } -static int pnp_legacyres_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pnp_legacyres_proc_show, NULL); -} - -static const struct file_operations pnp_legacyres_proc_fops = { - .owner = THIS_MODULE, - .open = pnp_legacyres_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pnp_devices_proc_show(struct seq_file *m, void *v) { struct pnp_bios_node *node; @@ -202,19 +150,6 @@ static int pnp_devices_proc_show(struct seq_file *m, void *v) return 0; } -static int pnp_devices_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pnp_devices_proc_show, NULL); -} - -static const struct file_operations pnp_devices_proc_fops = { - .owner = THIS_MODULE, - .open = pnp_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int pnpbios_proc_show(struct seq_file *m, void *v) { void *data = m->private; @@ -318,12 +253,13 @@ int __init pnpbios_proc_init(void) proc_pnp_boot = proc_mkdir("boot", proc_pnp); if (!proc_pnp_boot) return -EIO; - proc_create("devices", 0, proc_pnp, &pnp_devices_proc_fops); - proc_create("configuration_info", 0, proc_pnp, &pnpconfig_proc_fops); - proc_create("escd_info", 0, proc_pnp, &escd_info_proc_fops); - proc_create("escd", S_IRUSR, proc_pnp, &escd_proc_fops); - proc_create("legacy_device_resources", 0, proc_pnp, &pnp_legacyres_proc_fops); - + proc_create_single("devices", 0, proc_pnp, pnp_devices_proc_show); + proc_create_single("configuration_info", 0, proc_pnp, + pnpconfig_proc_show); + proc_create_single("escd_info", 0, proc_pnp, escd_info_proc_show); + proc_create_single("escd", S_IRUSR, proc_pnp, escd_proc_show); + proc_create_single("legacy_device_resources", 0, proc_pnp, + pnp_legacyres_proc_show); return 0; } diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c index 31e7e23cc5be..a9dd9218fae2 100644 --- a/drivers/rtc/rtc-proc.c +++ b/drivers/rtc/rtc-proc.c @@ -107,40 +107,11 @@ static int rtc_proc_show(struct seq_file *seq, void *offset) return 0; } -static int rtc_proc_open(struct inode *inode, struct file *file) -{ - int ret; - struct rtc_device *rtc = PDE_DATA(inode); - - if (!try_module_get(rtc->owner)) - return -ENODEV; - - ret = single_open(file, rtc_proc_show, rtc); - if (ret) - module_put(rtc->owner); - return ret; -} - -static int rtc_proc_release(struct inode *inode, struct file *file) -{ - int res = single_release(inode, file); - struct rtc_device *rtc = PDE_DATA(inode); - - module_put(rtc->owner); - return res; -} - -static const struct file_operations rtc_proc_fops = { - .open = rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = rtc_proc_release, -}; - void rtc_proc_add_device(struct rtc_device *rtc) { if (is_rtc_hctosys(rtc)) - proc_create_data("driver/rtc", 0, NULL, &rtc_proc_fops, rtc); + proc_create_single_data("driver/rtc", 0, NULL, rtc_proc_show, + rtc); } void rtc_proc_del_device(struct rtc_device *rtc) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 04143c08bd6e..cb9b685118da 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3034,7 +3034,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, cqr->callback_data = req; cqr->status = DASD_CQR_FILLED; cqr->dq = dq; - req->completion_data = cqr; + *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr; + blk_mq_start_request(req); spin_lock(&block->queue_lock); list_add_tail(&cqr->blocklist, &block->ccw_queue); @@ -3053,19 +3054,20 @@ out: * * Return values: * BLK_EH_RESET_TIMER if the request should be left running - * BLK_EH_NOT_HANDLED if the request is handled or terminated + * BLK_EH_DONE if the request is handled or terminated * by the driver. */ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) { - struct dasd_ccw_req *cqr = req->completion_data; struct dasd_block *block = req->q->queuedata; struct dasd_device *device; + struct dasd_ccw_req *cqr; unsigned long flags; int rc = 0; + cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)); if (!cqr) - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; spin_lock_irqsave(&cqr->dq->lock, flags); device = cqr->startdev ? cqr->startdev : block->base; @@ -3124,7 +3126,7 @@ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) spin_unlock(&block->queue_lock); spin_unlock_irqrestore(&cqr->dq->lock, flags); - return rc ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; + return rc ? BLK_EH_RESET_TIMER : BLK_EH_DONE; } static int dasd_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -3169,6 +3171,7 @@ static int dasd_alloc_queue(struct dasd_block *block) int rc; block->tag_set.ops = &dasd_mq_ops; + block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *); block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index c33788a829c3..5cb80c645489 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -131,19 +131,6 @@ static const struct seq_operations dasd_devices_seq_ops = { .show = dasd_devices_show, }; -static int dasd_devices_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &dasd_devices_seq_ops); -} - -static const struct file_operations dasd_devices_file_ops = { - .owner = THIS_MODULE, - .open = dasd_devices_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #ifdef CONFIG_DASD_PROFILE static int dasd_stats_all_block_on(void) { @@ -352,10 +339,10 @@ dasd_proc_init(void) dasd_proc_root_entry = proc_mkdir("dasd", NULL); if (!dasd_proc_root_entry) goto out_nodasd; - dasd_devices_entry = proc_create("devices", + dasd_devices_entry = proc_create_seq("devices", S_IFREG | S_IRUGO | S_IWUSR, dasd_proc_root_entry, - &dasd_devices_file_ops); + &dasd_devices_seq_ops); if (!dasd_devices_entry) goto out_nodevices; dasd_statistics_entry = proc_create("statistics", diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c index faae30476f4b..32a14ee31c6b 100644 --- a/drivers/s390/char/tape_proc.c +++ b/drivers/s390/char/tape_proc.c @@ -105,29 +105,14 @@ static const struct seq_operations tape_proc_seq = { .show = tape_proc_show, }; -static int tape_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tape_proc_seq); -} - -static const struct file_operations tape_proc_ops = -{ - .owner = THIS_MODULE, - .open = tape_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * Initialize procfs stuff on startup */ void tape_proc_init(void) { - tape_proc_devices = - proc_create("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, NULL, - &tape_proc_ops); + tape_proc_devices = proc_create_seq("tapedevices", + S_IFREG | S_IRUGO | S_IWUSR, NULL, &tape_proc_seq); if (tape_proc_devices == NULL) { return; } diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index a8b831000b2d..18c4f933e8b9 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -4,7 +4,7 @@ * * Debug traces for zfcp. * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2018 */ #define KMSG_COMPONENT "zfcp" @@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock + * @tag: identifier for event + * @adapter: adapter on which the erp_action should run + * @port: remote port involved in the erp_action + * @sdev: scsi device involved in the erp_action + * @want: wanted erp_action + * @need: required erp_action + * + * The adapter->erp_lock must not be held. + */ +void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, + struct zfcp_port *port, struct scsi_device *sdev, + u8 want, u8 need) +{ + unsigned long flags; + + read_lock_irqsave(&adapter->erp_lock, flags); + zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need); + read_unlock_irqrestore(&adapter->erp_lock, flags); +} /** * zfcp_dbf_rec_run_lvl - trace event related to running recovery diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index bf8ea4df2bb8..e5eed8aac0ce 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -4,7 +4,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2018 */ #ifndef ZFCP_EXT_H @@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *); extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); +extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, + struct zfcp_port *port, + struct scsi_device *sdev, u8 want, u8 need); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); extern void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 4d2ba5682493..22f9562f415c 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -4,7 +4,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2018 */ #define KMSG_COMPONENT "zfcp" @@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port) ids.port_id = port->d_id; ids.roles = FC_RPORT_ROLE_FCP_TARGET; - zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); + zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids); if (!rport) { dev_err(&port->adapter->ccw_device->dev, @@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port) struct fc_rport *rport = port->rport; if (rport) { - zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); + zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); fc_remote_port_delete(rport); port->rport = NULL; } diff --git a/drivers/sbus/char/Kconfig b/drivers/sbus/char/Kconfig index bf3c5f735614..89edd13fd572 100644 --- a/drivers/sbus/char/Kconfig +++ b/drivers/sbus/char/Kconfig @@ -28,13 +28,6 @@ config TADPOLE_TS102_UCTRL events, and can also notice the attachment/detachment of external monitors and mice. -config SUN_JSFLASH - tristate "JavaStation OS Flash SIMM" - depends on SPARC32 - help - If you say Y here, you will be able to boot from your JavaStation's - Flash memory. - config BBC_I2C tristate "UltraSPARC-III bootbus i2c controller driver" depends on PCI && SPARC64 diff --git a/drivers/sbus/char/Makefile b/drivers/sbus/char/Makefile index 8c48ed96683f..44347c918f6b 100644 --- a/drivers/sbus/char/Makefile +++ b/drivers/sbus/char/Makefile @@ -15,6 +15,5 @@ obj-$(CONFIG_DISPLAY7SEG) += display7seg.o obj-$(CONFIG_OBP_FLASH) += flash.o obj-$(CONFIG_SUN_OPENPROMIO) += openprom.o obj-$(CONFIG_TADPOLE_TS102_UCTRL) += uctrl.o -obj-$(CONFIG_SUN_JSFLASH) += jsflash.o obj-$(CONFIG_BBC_I2C) += bbc.o obj-$(CONFIG_ORACLE_DAX) += oradax.o diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c deleted file mode 100644 index 14f377ac1280..000000000000 --- a/drivers/sbus/char/jsflash.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - * drivers/sbus/char/jsflash.c - * - * Copyright (C) 1991, 1992 Linus Torvalds (drivers/char/mem.c) - * Copyright (C) 1997 Eddie C. Dost (drivers/sbus/char/flash.c) - * Copyright (C) 1997-2000 Pavel Machek <pavel@ucw.cz> (drivers/block/nbd.c) - * Copyright (C) 1999-2000 Pete Zaitcev - * - * This driver is used to program OS into a Flash SIMM on - * Krups and Espresso platforms. - * - * TODO: do not allow erase/programming if file systems are mounted. - * TODO: Erase/program both banks of a 8MB SIMM. - * - * It is anticipated that programming an OS Flash will be a routine - * procedure. In the same time it is exceedingly dangerous because - * a user can program its OBP flash with OS image and effectively - * kill the machine. - * - * This driver uses an interface different from Eddie's flash.c - * as a silly safeguard. - * - * XXX The flash.c manipulates page caching characteristics in a certain - * dubious way; also it assumes that remap_pfn_range() can remap - * PCI bus locations, which may be false. ioremap() must be used - * instead. We should discuss this. - */ - -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/fcntl.h> -#include <linux/poll.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/genhd.h> -#include <linux/blkdev.h> -#include <linux/uaccess.h> -#include <asm/pgtable.h> -#include <asm/io.h> -#include <asm/pcic.h> -#include <asm/oplib.h> - -#include <asm/jsflash.h> /* ioctl arguments. <linux/> ?? */ -#define JSFIDSZ (sizeof(struct jsflash_ident_arg)) -#define JSFPRGSZ (sizeof(struct jsflash_program_arg)) - -/* - * Our device numbers have no business in system headers. - * The only thing a user knows is the device name /dev/jsflash. - * - * Block devices are laid out like this: - * minor+0 - Bootstrap, for 8MB SIMM 0x20400000[0x800000] - * minor+1 - Filesystem to mount, normally 0x20400400[0x7ffc00] - * minor+2 - Whole flash area for any case... 0x20000000[0x01000000] - * Total 3 minors per flash device. - * - * It is easier to have static size vectors, so we define - * a total minor range JSF_MAX, which must cover all minors. - */ -/* character device */ -#define JSF_MINOR 178 /* 178 is registered with hpa */ -/* block device */ -#define JSF_MAX 3 /* 3 minors wasted total so far. */ -#define JSF_NPART 3 /* 3 minors per flash device */ -#define JSF_PART_BITS 2 /* 2 bits of minors to cover JSF_NPART */ -#define JSF_PART_MASK 0x3 /* 2 bits mask */ - -static DEFINE_MUTEX(jsf_mutex); - -/* - * Access functions. - * We could ioremap(), but it's easier this way. - */ -static unsigned int jsf_inl(unsigned long addr) -{ - unsigned long retval; - - __asm__ __volatile__("lda [%1] %2, %0\n\t" : - "=r" (retval) : - "r" (addr), "i" (ASI_M_BYPASS)); - return retval; -} - -static void jsf_outl(unsigned long addr, __u32 data) -{ - - __asm__ __volatile__("sta %0, [%1] %2\n\t" : : - "r" (data), "r" (addr), "i" (ASI_M_BYPASS) : - "memory"); -} - -/* - * soft carrier - */ - -struct jsfd_part { - unsigned long dbase; - unsigned long dsize; -}; - -struct jsflash { - unsigned long base; - unsigned long size; - unsigned long busy; /* In use? */ - struct jsflash_ident_arg id; - /* int mbase; */ /* Minor base, typically zero */ - struct jsfd_part dv[JSF_NPART]; -}; - -/* - * We do not map normal memory or obio as a safety precaution. - * But offsets are real, for ease of userland programming. - */ -#define JSF_BASE_TOP 0x30000000 -#define JSF_BASE_ALL 0x20000000 - -#define JSF_BASE_JK 0x20400000 - -/* - */ -static struct gendisk *jsfd_disk[JSF_MAX]; - -/* - * Let's pretend we may have several of these... - */ -static struct jsflash jsf0; - -/* - * Wait for AMD to finish its embedded algorithm. - * We use the Toggle bit DQ6 (0x40) because it does not - * depend on the data value as /DATA bit DQ7 does. - * - * XXX Do we need any timeout here? So far it never hanged, beware broken hw. - */ -static void jsf_wait(unsigned long p) { - unsigned int x1, x2; - - for (;;) { - x1 = jsf_inl(p); - x2 = jsf_inl(p); - if ((x1 & 0x40404040) == (x2 & 0x40404040)) return; - } -} - -/* - * Programming will only work if Flash is clean, - * we leave it to the programmer application. - * - * AMD must be programmed one byte at a time; - * thus, Simple Tech SIMM must be written 4 bytes at a time. - * - * Write waits for the chip to become ready after the write - * was finished. This is done so that application would read - * consistent data after the write is done. - */ -static void jsf_write4(unsigned long fa, u32 data) { - - jsf_outl(fa, 0xAAAAAAAA); /* Unlock 1 Write 1 */ - jsf_outl(fa, 0x55555555); /* Unlock 1 Write 2 */ - jsf_outl(fa, 0xA0A0A0A0); /* Byte Program */ - jsf_outl(fa, data); - - jsf_wait(fa); -} - -/* - */ -static void jsfd_read(char *buf, unsigned long p, size_t togo) { - union byte4 { - char s[4]; - unsigned int n; - } b; - - while (togo >= 4) { - togo -= 4; - b.n = jsf_inl(p); - memcpy(buf, b.s, 4); - p += 4; - buf += 4; - } -} - -static int jsfd_queue; - -static struct request *jsfd_next_request(void) -{ - struct request_queue *q; - struct request *rq; - int old_pos = jsfd_queue; - - do { - q = jsfd_disk[jsfd_queue]->queue; - if (++jsfd_queue == JSF_MAX) - jsfd_queue = 0; - if (q) { - rq = blk_fetch_request(q); - if (rq) - return rq; - } - } while (jsfd_queue != old_pos); - - return NULL; -} - -static void jsfd_request(void) -{ - struct request *req; - - req = jsfd_next_request(); - while (req) { - struct jsfd_part *jdp = req->rq_disk->private_data; - unsigned long offset = blk_rq_pos(req) << 9; - size_t len = blk_rq_cur_bytes(req); - blk_status_t err = BLK_STS_IOERR; - - if ((offset + len) > jdp->dsize) - goto end; - - if (rq_data_dir(req) != READ) { - printk(KERN_ERR "jsfd: write\n"); - goto end; - } - - if ((jdp->dbase & 0xff000000) != 0x20000000) { - printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase); - goto end; - } - - jsfd_read(bio_data(req->bio), jdp->dbase + offset, len); - err = BLK_STS_OK; - end: - if (!__blk_end_request_cur(req, err)) - req = jsfd_next_request(); - } -} - -static void jsfd_do_request(struct request_queue *q) -{ - jsfd_request(); -} - -/* - * The memory devices use the full 32/64 bits of the offset, and so we cannot - * check against negative addresses: they are ok. The return value is weird, - * though, in that case (0). - * - * also note that seeking relative to the "end of file" isn't supported: - * it has no meaning, so it returns -EINVAL. - */ -static loff_t jsf_lseek(struct file * file, loff_t offset, int orig) -{ - loff_t ret; - - mutex_lock(&jsf_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&jsf_mutex); - return ret; -} - -/* - * OS SIMM Cannot be read in other size but a 32bits word. - */ -static ssize_t jsf_read(struct file * file, char __user * buf, - size_t togo, loff_t *ppos) -{ - unsigned long p = *ppos; - char __user *tmp = buf; - - union byte4 { - char s[4]; - unsigned int n; - } b; - - if (p < JSF_BASE_ALL || p >= JSF_BASE_TOP) { - return 0; - } - - if ((p + togo) < p /* wrap */ - || (p + togo) >= JSF_BASE_TOP) { - togo = JSF_BASE_TOP - p; - } - - if (p < JSF_BASE_ALL && togo != 0) { -#if 0 /* __bzero XXX */ - size_t x = JSF_BASE_ALL - p; - if (x > togo) x = togo; - clear_user(tmp, x); - tmp += x; - p += x; - togo -= x; -#else - /* - * Implementation of clear_user() calls __bzero - * without regard to modversions, - * so we cannot build a module. - */ - return 0; -#endif - } - - while (togo >= 4) { - togo -= 4; - b.n = jsf_inl(p); - if (copy_to_user(tmp, b.s, 4)) - return -EFAULT; - tmp += 4; - p += 4; - } - - /* - * XXX Small togo may remain if 1 byte is ordered. - * It would be nice if we did a word size read and unpacked it. - */ - - *ppos = p; - return tmp-buf; -} - -static ssize_t jsf_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) -{ - return -ENOSPC; -} - -/* - */ -static int jsf_ioctl_erase(unsigned long arg) -{ - unsigned long p; - - /* p = jsf0.base; hits wrong bank */ - p = 0x20400000; - - jsf_outl(p, 0xAAAAAAAA); /* Unlock 1 Write 1 */ - jsf_outl(p, 0x55555555); /* Unlock 1 Write 2 */ - jsf_outl(p, 0x80808080); /* Erase setup */ - jsf_outl(p, 0xAAAAAAAA); /* Unlock 2 Write 1 */ - jsf_outl(p, 0x55555555); /* Unlock 2 Write 2 */ - jsf_outl(p, 0x10101010); /* Chip erase */ - -#if 0 - /* - * This code is ok, except that counter based timeout - * has no place in this world. Let's just drop timeouts... - */ - { - int i; - __u32 x; - for (i = 0; i < 1000000; i++) { - x = jsf_inl(p); - if ((x & 0x80808080) == 0x80808080) break; - } - if ((x & 0x80808080) != 0x80808080) { - printk("jsf0: erase timeout with 0x%08x\n", x); - } else { - printk("jsf0: erase done with 0x%08x\n", x); - } - } -#else - jsf_wait(p); -#endif - - return 0; -} - -/* - * Program a block of flash. - * Very simple because we can do it byte by byte anyway. - */ -static int jsf_ioctl_program(void __user *arg) -{ - struct jsflash_program_arg abuf; - char __user *uptr; - unsigned long p; - unsigned int togo; - union { - unsigned int n; - char s[4]; - } b; - - if (copy_from_user(&abuf, arg, JSFPRGSZ)) - return -EFAULT; - p = abuf.off; - togo = abuf.size; - if ((togo & 3) || (p & 3)) return -EINVAL; - - uptr = (char __user *) (unsigned long) abuf.data; - while (togo != 0) { - togo -= 4; - if (copy_from_user(&b.s[0], uptr, 4)) - return -EFAULT; - jsf_write4(p, b.n); - p += 4; - uptr += 4; - } - - return 0; -} - -static long jsf_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -{ - mutex_lock(&jsf_mutex); - int error = -ENOTTY; - void __user *argp = (void __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) { - mutex_unlock(&jsf_mutex); - return -EPERM; - } - switch (cmd) { - case JSFLASH_IDENT: - if (copy_to_user(argp, &jsf0.id, JSFIDSZ)) { - mutex_unlock(&jsf_mutex); - return -EFAULT; - } - break; - case JSFLASH_ERASE: - error = jsf_ioctl_erase(arg); - break; - case JSFLASH_PROGRAM: - error = jsf_ioctl_program(argp); - break; - } - - mutex_unlock(&jsf_mutex); - return error; -} - -static int jsf_mmap(struct file * file, struct vm_area_struct * vma) -{ - return -ENXIO; -} - -static int jsf_open(struct inode * inode, struct file * filp) -{ - mutex_lock(&jsf_mutex); - if (jsf0.base == 0) { - mutex_unlock(&jsf_mutex); - return -ENXIO; - } - if (test_and_set_bit(0, (void *)&jsf0.busy) != 0) { - mutex_unlock(&jsf_mutex); - return -EBUSY; - } - - mutex_unlock(&jsf_mutex); - return 0; /* XXX What security? */ -} - -static int jsf_release(struct inode *inode, struct file *file) -{ - jsf0.busy = 0; - return 0; -} - -static const struct file_operations jsf_fops = { - .owner = THIS_MODULE, - .llseek = jsf_lseek, - .read = jsf_read, - .write = jsf_write, - .unlocked_ioctl = jsf_ioctl, - .mmap = jsf_mmap, - .open = jsf_open, - .release = jsf_release, -}; - -static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops }; - -static const struct block_device_operations jsfd_fops = { - .owner = THIS_MODULE, -}; - -static int jsflash_init(void) -{ - int rc; - struct jsflash *jsf; - phandle node; - char banner[128]; - struct linux_prom_registers reg0; - - node = prom_getchild(prom_root_node); - node = prom_searchsiblings(node, "flash-memory"); - if (node != 0 && (s32)node != -1) { - if (prom_getproperty(node, "reg", - (char *)®0, sizeof(reg0)) == -1) { - printk("jsflash: no \"reg\" property\n"); - return -ENXIO; - } - if (reg0.which_io != 0) { - printk("jsflash: bus number nonzero: 0x%x:%x\n", - reg0.which_io, reg0.phys_addr); - return -ENXIO; - } - /* - * Flash may be somewhere else, for instance on Ebus. - * So, don't do the following check for IIep flash space. - */ -#if 0 - if ((reg0.phys_addr >> 24) != 0x20) { - printk("jsflash: suspicious address: 0x%x:%x\n", - reg0.which_io, reg0.phys_addr); - return -ENXIO; - } -#endif - if ((int)reg0.reg_size <= 0) { - printk("jsflash: bad size 0x%x\n", (int)reg0.reg_size); - return -ENXIO; - } - } else { - /* XXX Remove this code once PROLL ID12 got widespread */ - printk("jsflash: no /flash-memory node, use PROLL >= 12\n"); - prom_getproperty(prom_root_node, "banner-name", banner, 128); - if (strcmp (banner, "JavaStation-NC") != 0 && - strcmp (banner, "JavaStation-E") != 0) { - return -ENXIO; - } - reg0.which_io = 0; - reg0.phys_addr = 0x20400000; - reg0.reg_size = 0x00800000; - } - - /* Let us be really paranoid for modifications to probing code. */ - if (sparc_cpu_model != sun4m) { - /* We must be on sun4m because we use MMU Bypass ASI. */ - return -ENXIO; - } - - if (jsf0.base == 0) { - jsf = &jsf0; - - jsf->base = reg0.phys_addr; - jsf->size = reg0.reg_size; - - /* XXX Redo the userland interface. */ - jsf->id.off = JSF_BASE_ALL; - jsf->id.size = 0x01000000; /* 16M - all segments */ - strcpy(jsf->id.name, "Krups_all"); - - jsf->dv[0].dbase = jsf->base; - jsf->dv[0].dsize = jsf->size; - jsf->dv[1].dbase = jsf->base + 1024; - jsf->dv[1].dsize = jsf->size - 1024; - jsf->dv[2].dbase = JSF_BASE_ALL; - jsf->dv[2].dsize = 0x01000000; - - printk("Espresso Flash @0x%lx [%d MB]\n", jsf->base, - (int) (jsf->size / (1024*1024))); - } - - if ((rc = misc_register(&jsf_dev)) != 0) { - printk(KERN_ERR "jsf: unable to get misc minor %d\n", - JSF_MINOR); - jsf0.base = 0; - return rc; - } - - return 0; -} - -static int jsfd_init(void) -{ - static DEFINE_SPINLOCK(lock); - struct jsflash *jsf; - struct jsfd_part *jdp; - int err; - int i; - - if (jsf0.base == 0) - return -ENXIO; - - err = -ENOMEM; - for (i = 0; i < JSF_MAX; i++) { - struct gendisk *disk = alloc_disk(1); - if (!disk) - goto out; - disk->queue = blk_init_queue(jsfd_do_request, &lock); - if (!disk->queue) { - put_disk(disk); - goto out; - } - blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); - jsfd_disk[i] = disk; - } - - if (register_blkdev(JSFD_MAJOR, "jsfd")) { - err = -EIO; - goto out; - } - - for (i = 0; i < JSF_MAX; i++) { - struct gendisk *disk = jsfd_disk[i]; - if ((i & JSF_PART_MASK) >= JSF_NPART) continue; - jsf = &jsf0; /* actually, &jsfv[i >> JSF_PART_BITS] */ - jdp = &jsf->dv[i&JSF_PART_MASK]; - - disk->major = JSFD_MAJOR; - disk->first_minor = i; - sprintf(disk->disk_name, "jsfd%d", i); - disk->fops = &jsfd_fops; - set_capacity(disk, jdp->dsize >> 9); - disk->private_data = jdp; - add_disk(disk); - set_disk_ro(disk, 1); - } - return 0; -out: - while (i--) - put_disk(jsfd_disk[i]); - return err; -} - -MODULE_LICENSE("GPL"); - -static int __init jsflash_init_module(void) { - int rc; - - if ((rc = jsflash_init()) == 0) { - jsfd_init(); - return 0; - } - return rc; -} - -static void __exit jsflash_cleanup_module(void) -{ - int i; - - for (i = 0; i < JSF_MAX; i++) { - if ((i & JSF_PART_MASK) >= JSF_NPART) continue; - del_gendisk(jsfd_disk[i]); - blk_cleanup_queue(jsfd_disk[i]->queue); - put_disk(jsfd_disk[i]); - } - if (jsf0.busy) - printk("jsf0: cleaning busy unit\n"); - jsf0.base = 0; - jsf0.busy = 0; - - misc_deregister(&jsf_dev); - unregister_blkdev(JSFD_MAJOR, "jsfd"); -} - -module_init(jsflash_init_module); -module_exit(jsflash_cleanup_module); diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index e29f9b8fd66d..56c940394729 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -182,7 +182,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o # Files generated that shall be removed upon make clean -clean-files := 53c700_d.h 53c700_u.h +clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index c35f05c4c6bb..85604795d8ee 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -3882,7 +3882,7 @@ static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp) struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); u8 b, t; unsigned long flags; - enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED; + enum blk_eh_timer_return retval = BLK_EH_DONE; TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__)); b = scp->device->channel; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 15a2fef51e38..71bdc0b52cf9 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1963,7 +1963,7 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) { - enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; + enum blk_eh_timer_return rc = BLK_EH_DONE; struct iscsi_task *task = NULL, *running_task; struct iscsi_cls_session *cls_session; struct iscsi_session *session; @@ -1982,7 +1982,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Raced with completion. Blk layer has taken ownership * so let timeout code complete it now. */ - rc = BLK_EH_HANDLED; + rc = BLK_EH_DONE; goto done; } @@ -1997,7 +1997,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) if (unlikely(system_state != SYSTEM_RUNNING)) { sc->result = DID_NO_CONNECT << 16; ISCSI_DBG_EH(session, "sc on shutdown, handled\n"); - rc = BLK_EH_HANDLED; + rc = BLK_EH_DONE; goto done; } /* diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 7195cff51d4c..91f5e2c68dbc 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -2731,53 +2731,6 @@ proc_show_rdrv_40(struct seq_file *m, void *v) return proc_show_rdrv(m, m->private, 30, 39); } - -/* - * seq_file wrappers for procfile show routines. - */ -static int mega_proc_open(struct inode *inode, struct file *file) -{ - adapter_t *adapter = proc_get_parent_data(inode); - int (*show)(struct seq_file *, void *) = PDE_DATA(inode); - - return single_open(file, show, adapter); -} - -static const struct file_operations mega_proc_fops = { - .open = mega_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * Table of proc files we need to create. - */ -struct mega_proc_file { - const char *name; - unsigned short ptr_offset; - int (*show) (struct seq_file *m, void *v); -}; - -static const struct mega_proc_file mega_proc_files[] = { - { "config", offsetof(adapter_t, proc_read), proc_show_config }, - { "stat", offsetof(adapter_t, proc_stat), proc_show_stat }, - { "mailbox", offsetof(adapter_t, proc_mbox), proc_show_mbox }, -#if MEGA_HAVE_ENH_PROC - { "rebuild-rate", offsetof(adapter_t, proc_rr), proc_show_rebuild_rate }, - { "battery-status", offsetof(adapter_t, proc_battery), proc_show_battery }, - { "diskdrives-ch0", offsetof(adapter_t, proc_pdrvstat[0]), proc_show_pdrv_ch0 }, - { "diskdrives-ch1", offsetof(adapter_t, proc_pdrvstat[1]), proc_show_pdrv_ch1 }, - { "diskdrives-ch2", offsetof(adapter_t, proc_pdrvstat[2]), proc_show_pdrv_ch2 }, - { "diskdrives-ch3", offsetof(adapter_t, proc_pdrvstat[3]), proc_show_pdrv_ch3 }, - { "raiddrives-0-9", offsetof(adapter_t, proc_rdrvstat[0]), proc_show_rdrv_10 }, - { "raiddrives-10-19", offsetof(adapter_t, proc_rdrvstat[1]), proc_show_rdrv_20 }, - { "raiddrives-20-29", offsetof(adapter_t, proc_rdrvstat[2]), proc_show_rdrv_30 }, - { "raiddrives-30-39", offsetof(adapter_t, proc_rdrvstat[3]), proc_show_rdrv_40 }, -#endif - { NULL } -}; - /** * mega_create_proc_entry() * @index - index in soft state array @@ -2788,31 +2741,45 @@ static const struct mega_proc_file mega_proc_files[] = { static void mega_create_proc_entry(int index, struct proc_dir_entry *parent) { - const struct mega_proc_file *f; - adapter_t *adapter = hba_soft_state[index]; - struct proc_dir_entry *dir, *de, **ppde; - u8 string[16]; + adapter_t *adapter = hba_soft_state[index]; + struct proc_dir_entry *dir; + u8 string[16]; sprintf(string, "hba%d", adapter->host->host_no); - - dir = adapter->controller_proc_dir_entry = - proc_mkdir_data(string, 0, parent, adapter); - if(!dir) { + dir = proc_mkdir_data(string, 0, parent, adapter); + if (!dir) { dev_warn(&adapter->dev->dev, "proc_mkdir failed\n"); return; } - for (f = mega_proc_files; f->name; f++) { - de = proc_create_data(f->name, S_IRUSR, dir, &mega_proc_fops, - f->show); - if (!de) { - dev_warn(&adapter->dev->dev, "proc_create failed\n"); - return; - } - - ppde = (void *)adapter + f->ptr_offset; - *ppde = de; - } + proc_create_single_data("config", S_IRUSR, dir, + proc_show_config, adapter); + proc_create_single_data("stat", S_IRUSR, dir, + proc_show_stat, adapter); + proc_create_single_data("mailbox", S_IRUSR, dir, + proc_show_mbox, adapter); +#if MEGA_HAVE_ENH_PROC + proc_create_single_data("rebuild-rate", S_IRUSR, dir, + proc_show_rebuild_rate, adapter); + proc_create_single_data("battery-status", S_IRUSR, dir, + proc_show_battery, adapter); + proc_create_single_data("diskdrives-ch0", S_IRUSR, dir, + proc_show_pdrv_ch0, adapter); + proc_create_single_data("diskdrives-ch1", S_IRUSR, dir, + proc_show_pdrv_ch1, adapter); + proc_create_single_data("diskdrives-ch2", S_IRUSR, dir, + proc_show_pdrv_ch2, adapter); + proc_create_single_data("diskdrives-ch3", S_IRUSR, dir, + proc_show_pdrv_ch3, adapter); + proc_create_single_data("raiddrives-0-9", S_IRUSR, dir, + proc_show_rdrv_10, adapter); + proc_create_single_data("raiddrives-10-19", S_IRUSR, dir, + proc_show_rdrv_20, adapter); + proc_create_single_data("raiddrives-20-29", S_IRUSR, dir, + proc_show_rdrv_30, adapter); + proc_create_single_data("raiddrives-30-39", S_IRUSR, dir, + proc_show_rdrv_40, adapter); +#endif } #else @@ -4580,6 +4547,7 @@ megaraid_remove_one(struct pci_dev *pdev) { struct Scsi_Host *host = pci_get_drvdata(pdev); adapter_t *adapter = (adapter_t *)host->hostdata; + char buf[12] = { 0 }; scsi_remove_host(host); @@ -4594,44 +4562,8 @@ megaraid_remove_one(struct pci_dev *pdev) mega_free_sgl(adapter); -#ifdef CONFIG_PROC_FS - if (adapter->controller_proc_dir_entry) { - remove_proc_entry("stat", adapter->controller_proc_dir_entry); - remove_proc_entry("config", - adapter->controller_proc_dir_entry); - remove_proc_entry("mailbox", - adapter->controller_proc_dir_entry); -#if MEGA_HAVE_ENH_PROC - remove_proc_entry("rebuild-rate", - adapter->controller_proc_dir_entry); - remove_proc_entry("battery-status", - adapter->controller_proc_dir_entry); - - remove_proc_entry("diskdrives-ch0", - adapter->controller_proc_dir_entry); - remove_proc_entry("diskdrives-ch1", - adapter->controller_proc_dir_entry); - remove_proc_entry("diskdrives-ch2", - adapter->controller_proc_dir_entry); - remove_proc_entry("diskdrives-ch3", - adapter->controller_proc_dir_entry); - - remove_proc_entry("raiddrives-0-9", - adapter->controller_proc_dir_entry); - remove_proc_entry("raiddrives-10-19", - adapter->controller_proc_dir_entry); - remove_proc_entry("raiddrives-20-29", - adapter->controller_proc_dir_entry); - remove_proc_entry("raiddrives-30-39", - adapter->controller_proc_dir_entry); -#endif - { - char buf[12] = { 0 }; - sprintf(buf, "hba%d", adapter->host->host_no); - remove_proc_entry(buf, mega_proc_dir_entry); - } - } -#endif + sprintf(buf, "hba%d", adapter->host->host_no); + remove_proc_subtree(buf, mega_proc_dir_entry); pci_free_consistent(adapter->dev, MEGA_BUFFER_SIZE, adapter->mega_buffer, adapter->buf_dma_handle); diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h index 21eba2fd465a..18e85d9267ff 100644 --- a/drivers/scsi/megaraid.h +++ b/drivers/scsi/megaraid.h @@ -814,18 +814,6 @@ typedef struct { #ifdef CONFIG_PROC_FS struct proc_dir_entry *controller_proc_dir_entry; - struct proc_dir_entry *proc_read; - struct proc_dir_entry *proc_stat; - struct proc_dir_entry *proc_mbox; - -#if MEGA_HAVE_ENH_PROC - struct proc_dir_entry *proc_rr; - struct proc_dir_entry *proc_battery; -#define MAX_PROC_CHANNELS 4 - struct proc_dir_entry *proc_pdrvstat[MAX_PROC_CHANNELS]; - struct proc_dir_entry *proc_rdrvstat[MAX_PROC_CHANNELS]; -#endif - #endif int has_64bit_addr; /* are we using 64-bit addressing */ diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index b89c6e6c0589..ce656c466ca9 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2772,7 +2772,7 @@ blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) if (time_after(jiffies, scmd->jiffies_at_alloc + (scmd_timeout * 2) * HZ)) { - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; } instance = (struct megasas_instance *)scmd->device->host->hostdata; diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index fe97401ad192..afd27165cd93 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -2155,7 +2155,7 @@ static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd) mvumi_return_cmd(mhba, cmd); spin_unlock_irqrestore(mhba->shost->host_lock, flags); - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; } static int diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index e18877177f1b..5a33e1ad9881 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -99,7 +99,7 @@ static int _osd_get_print_system_info(struct osd_dev *od, int nelem = ARRAY_SIZE(get_attrs), a = 0; int ret; - or = osd_start_request(od, GFP_KERNEL); + or = osd_start_request(od); if (!or) return -ENOMEM; @@ -409,16 +409,15 @@ static void _osd_request_free(struct osd_request *or) kfree(or); } -struct osd_request *osd_start_request(struct osd_dev *dev, gfp_t gfp) +struct osd_request *osd_start_request(struct osd_dev *dev) { struct osd_request *or; - or = _osd_request_alloc(gfp); + or = _osd_request_alloc(GFP_KERNEL); if (!or) return NULL; or->osd_dev = dev; - or->alloc_flags = gfp; or->timeout = dev->def_timeout; or->retries = OSD_REQ_RETRIES; @@ -546,7 +545,7 @@ static int _osd_realloc_seg(struct osd_request *or, if (seg->alloc_size >= max_bytes) return 0; - buff = krealloc(seg->buff, max_bytes, or->alloc_flags); + buff = krealloc(seg->buff, max_bytes, GFP_KERNEL); if (!buff) { OSD_ERR("Failed to Realloc %d-bytes was-%d\n", max_bytes, seg->alloc_size); @@ -728,7 +727,7 @@ static int _osd_req_list_objects(struct osd_request *or, _osd_req_encode_olist(or, list); WARN_ON(or->in.bio); - bio = bio_map_kern(q, list, len, or->alloc_flags); + bio = bio_map_kern(q, list, len, GFP_KERNEL); if (IS_ERR(bio)) { OSD_ERR("!!! Failed to allocate list_objects BIO\n"); return PTR_ERR(bio); @@ -1190,14 +1189,14 @@ static int _req_append_segment(struct osd_request *or, pad_buff = io->pad_buff; ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding, - or->alloc_flags); + GFP_KERNEL); if (ret) return ret; io->total_bytes += padding; } ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes, - or->alloc_flags); + GFP_KERNEL); if (ret) return ret; @@ -1564,14 +1563,14 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or, * osd_finalize_request and helpers */ static struct request *_make_request(struct request_queue *q, bool has_write, - struct _osd_io_info *oii, gfp_t flags) + struct _osd_io_info *oii) { struct request *req; struct bio *bio = oii->bio; int ret; req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, - flags); + 0); if (IS_ERR(req)) return req; @@ -1589,13 +1588,12 @@ static struct request *_make_request(struct request_queue *q, bool has_write, static int _init_blk_request(struct osd_request *or, bool has_in, bool has_out) { - gfp_t flags = or->alloc_flags; struct scsi_device *scsi_device = or->osd_dev->scsi_device; struct request_queue *q = scsi_device->request_queue; struct request *req; int ret; - req = _make_request(q, has_out, has_out ? &or->out : &or->in, flags); + req = _make_request(q, has_out, has_out ? &or->out : &or->in); if (IS_ERR(req)) { ret = PTR_ERR(req); goto out; @@ -1611,7 +1609,7 @@ static int _init_blk_request(struct osd_request *or, or->out.req = req; if (has_in) { /* allocate bidi request */ - req = _make_request(q, false, &or->in, flags); + req = _make_request(q, false, &or->in); if (IS_ERR(req)) { OSD_DEBUG("blk_get_request for bidi failed\n"); ret = PTR_ERR(req); diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 20ec1c01dbd5..2bbe797f8c3d 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -368,7 +368,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, int write = (data_direction == DMA_TO_DEVICE); req = blk_get_request(SRpnt->stp->device->request_queue, - write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); + write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(req)) return DRIVER_ERROR << 24; diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 94c14ce94da2..0e13349dce57 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1848,7 +1848,7 @@ static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) struct iscsi_cls_session *session; struct iscsi_session *sess; unsigned long flags; - enum blk_eh_timer_return ret = BLK_EH_NOT_HANDLED; + enum blk_eh_timer_return ret = BLK_EH_DONE; session = starget_to_session(scsi_target(sc->device)); sess = session->dd_data; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 946039117bf4..9c02ba2e7ef3 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -282,7 +282,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) enum blk_eh_timer_return scsi_times_out(struct request *req) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); - enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; + enum blk_eh_timer_return rtn = BLK_EH_DONE; struct Scsi_Host *host = scmd->device->host; trace_scsi_dispatch_cmd_timeout(scmd); @@ -294,7 +294,7 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) if (host->hostt->eh_timed_out) rtn = host->hostt->eh_timed_out(scmd); - if (rtn == BLK_EH_NOT_HANDLED) { + if (rtn == BLK_EH_DONE) { if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); @@ -1933,11 +1933,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) struct request *req; struct scsi_request *rq; - /* - * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a - * request becomes available - */ - req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL); + req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, 0); if (IS_ERR(req)) return; rq = scsi_req(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9b4f279d29c..fb38aeff9dbd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -265,7 +265,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, struct scsi_request *rq; int ret = DRIVER_ERROR << 24; - req = blk_get_request_flags(sdev->request_queue, + req = blk_get_request(sdev->request_queue, data_direction == DMA_TO_DEVICE ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT); if (IS_ERR(req)) @@ -273,7 +273,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, rq = scsi_req(req); if (bufflen && blk_rq_map_kern(sdev->request_queue, req, - buffer, bufflen, __GFP_RECLAIM)) + buffer, bufflen, GFP_NOIO)) goto out; rq->cmd_len = COMMAND_SIZE(cmd[0]); @@ -2149,27 +2149,6 @@ static int scsi_map_queues(struct blk_mq_tag_set *set) return blk_mq_map_queues(set); } -static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost) -{ - struct device *host_dev; - u64 bounce_limit = 0xffffffff; - - if (shost->unchecked_isa_dma) - return BLK_BOUNCE_ISA; - /* - * Platforms with virtual-DMA translation - * hardware have no practical limit. - */ - if (!PCI_DMA_BUS_IS_PHYS) - return BLK_BOUNCE_ANY; - - host_dev = scsi_get_device(shost); - if (host_dev && host_dev->dma_mask) - bounce_limit = (u64)dma_max_pfn(host_dev) << PAGE_SHIFT; - - return bounce_limit; -} - void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) { struct device *dev = shost->dma_dev; @@ -2189,7 +2168,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) } blk_queue_max_hw_sectors(q, shost->max_sectors); - blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); + if (shost->unchecked_isa_dma) + blk_queue_bounce_limit(q, BLK_BOUNCE_ISA); blk_queue_segment_boundary(q, shost->dma_boundary); dma_set_seg_boundary(dev, shost->dma_boundary); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index be3be0f9cb2d..1da3d71e9f61 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -2087,7 +2087,7 @@ fc_eh_timed_out(struct scsi_cmnd *scmd) if (rport->port_state == FC_PORTSTATE_BLOCKED) return BLK_EH_RESET_TIMER; - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; } EXPORT_SYMBOL(fc_eh_timed_out); @@ -3591,10 +3591,9 @@ fc_bsg_job_timeout(struct request *req) } /* the blk_end_sync_io() doesn't check the error */ - if (!inflight) - return BLK_EH_NOT_HANDLED; - else - return BLK_EH_HANDLED; + if (inflight) + blk_mq_complete_request(req); + return BLK_EH_DONE; } /** @@ -3781,8 +3780,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size, - NULL); + q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "fc_host%d: bsg interface failed to initialize - setup queue\n", @@ -3827,8 +3825,8 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) if (!i->f->bsg_request) return -ENOTSUPP; - q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size, - NULL); + q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch, + i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 65f6c94f2e9b..6fd2fe210fc3 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1542,7 +1542,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0, NULL); + q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 08acbabfae07..e2953b416746 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -187,16 +187,6 @@ static int sas_smp_dispatch(struct bsg_job *job) return 0; } -static void sas_host_release(struct device *dev) -{ - struct Scsi_Host *shost = dev_to_shost(dev); - struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); - struct request_queue *q = sas_host->q; - - if (q) - blk_cleanup_queue(q); -} - static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) { struct request_queue *q; @@ -208,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) if (rphy) { q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev), - sas_smp_dispatch, 0, NULL); + sas_smp_dispatch, 0); if (IS_ERR(q)) return PTR_ERR(q); rphy->q = q; @@ -217,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) snprintf(name, sizeof(name), "sas_host%d", shost->host_no); q = bsg_setup_queue(&shost->shost_gendev, name, - sas_smp_dispatch, 0, sas_host_release); + sas_smp_dispatch, 0); if (IS_ERR(q)) return PTR_ERR(q); to_sas_host_attrs(shost)->q = q; @@ -260,8 +250,11 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev, struct Scsi_Host *shost = dev_to_shost(dev); struct request_queue *q = to_sas_host_attrs(shost)->q; - if (q) + if (q) { bsg_unregister_queue(q); + blk_cleanup_queue(q); + } + return 0; } diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 36f6190931bc..4e46fdb2d7c9 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -51,6 +51,8 @@ struct srp_internal { struct transport_container rport_attr_cont; }; +static int scsi_is_srp_rport(const struct device *dev); + #define to_srp_internal(tmpl) container_of(tmpl, struct srp_internal, t) #define dev_to_rport(d) container_of(d, struct srp_rport, dev) @@ -60,9 +62,24 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) return dev_to_shost(r->dev.parent); } +static int find_child_rport(struct device *dev, void *data) +{ + struct device **child = data; + + if (scsi_is_srp_rport(dev)) { + WARN_ON_ONCE(*child); + *child = dev; + } + return 0; +} + static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost) { - return transport_class_to_srp_rport(&shost->shost_gendev); + struct device *child = NULL; + + WARN_ON_ONCE(device_for_each_child(&shost->shost_gendev, &child, + find_child_rport) < 0); + return child ? dev_to_rport(child) : NULL; } /** @@ -587,7 +604,7 @@ EXPORT_SYMBOL(srp_reconnect_rport); * * If a timeout occurs while an rport is in the blocked state, ask the SCSI * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core - * handle the timeout (BLK_EH_NOT_HANDLED). + * handle the timeout (BLK_EH_DONE). * * Note: This function is called from soft-IRQ context and with the request * queue lock held. @@ -600,9 +617,10 @@ enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) struct srp_rport *rport = shost_to_rport(shost); pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); - return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && + return rport && rport->fast_io_fail_tmo < 0 && + rport->dev_loss_tmo < 0 && i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? - BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; + BLK_EH_RESET_TIMER : BLK_EH_DONE; } EXPORT_SYMBOL(srp_timed_out); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c198b96368dd..6fc58e2c99d3 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -66,7 +66,6 @@ static int sg_version_num = 30536; /* 2 digits for each component */ static char *sg_version_date = "20140603"; static int sg_proc_init(void); -static void sg_proc_cleanup(void); #endif #define SG_ALLOW_DIO_DEF 0 @@ -1661,7 +1660,7 @@ static void __exit exit_sg(void) { #ifdef CONFIG_SCSI_PROC_FS - sg_proc_cleanup(); + remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ scsi_unregister_interface(&sg_interface); class_destroy(sg_sysfs_class); @@ -1715,7 +1714,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) * does not sleep except under memory pressure. */ rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { kfree(long_cmdp); return PTR_ERR(rq); @@ -1894,7 +1893,7 @@ retry: num = (rem_sz > scatter_elem_sz_prev) ? scatter_elem_sz_prev : rem_sz; - schp->pages[k] = alloc_pages(gfp_mask, order); + schp->pages[k] = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!schp->pages[k]) goto out; @@ -2274,11 +2273,6 @@ sg_get_dev(int dev) } #ifdef CONFIG_SCSI_PROC_FS - -static struct proc_dir_entry *sg_proc_sgp = NULL; - -static char sg_proc_sg_dirname[] = "scsi/sg"; - static int sg_proc_seq_show_int(struct seq_file *s, void *v); static int sg_proc_single_open_adio(struct inode *inode, struct file *file); @@ -2306,37 +2300,11 @@ static const struct file_operations dressz_fops = { }; static int sg_proc_seq_show_version(struct seq_file *s, void *v); -static int sg_proc_single_open_version(struct inode *inode, struct file *file); -static const struct file_operations version_fops = { - .owner = THIS_MODULE, - .open = sg_proc_single_open_version, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v); -static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file); -static const struct file_operations devhdr_fops = { - .owner = THIS_MODULE, - .open = sg_proc_single_open_devhdr, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int sg_proc_seq_show_dev(struct seq_file *s, void *v); -static int sg_proc_open_dev(struct inode *inode, struct file *file); static void * dev_seq_start(struct seq_file *s, loff_t *pos); static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos); static void dev_seq_stop(struct seq_file *s, void *v); -static const struct file_operations dev_fops = { - .owner = THIS_MODULE, - .open = sg_proc_open_dev, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, @@ -2345,14 +2313,6 @@ static const struct seq_operations dev_seq_ops = { }; static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v); -static int sg_proc_open_devstrs(struct inode *inode, struct file *file); -static const struct file_operations devstrs_fops = { - .owner = THIS_MODULE, - .open = sg_proc_open_devstrs, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; static const struct seq_operations devstrs_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, @@ -2361,14 +2321,6 @@ static const struct seq_operations devstrs_seq_ops = { }; static int sg_proc_seq_show_debug(struct seq_file *s, void *v); -static int sg_proc_open_debug(struct inode *inode, struct file *file); -static const struct file_operations debug_fops = { - .owner = THIS_MODULE, - .open = sg_proc_open_debug, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; static const struct seq_operations debug_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, @@ -2376,50 +2328,23 @@ static const struct seq_operations debug_seq_ops = { .show = sg_proc_seq_show_debug, }; - -struct sg_proc_leaf { - const char * name; - const struct file_operations * fops; -}; - -static const struct sg_proc_leaf sg_proc_leaf_arr[] = { - {"allow_dio", &adio_fops}, - {"debug", &debug_fops}, - {"def_reserved_size", &dressz_fops}, - {"device_hdr", &devhdr_fops}, - {"devices", &dev_fops}, - {"device_strs", &devstrs_fops}, - {"version", &version_fops} -}; - static int sg_proc_init(void) { - int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr); - int k; + struct proc_dir_entry *p; - sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL); - if (!sg_proc_sgp) + p = proc_mkdir("scsi/sg", NULL); + if (!p) return 1; - for (k = 0; k < num_leaves; ++k) { - const struct sg_proc_leaf *leaf = &sg_proc_leaf_arr[k]; - umode_t mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO; - proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops); - } - return 0; -} -static void -sg_proc_cleanup(void) -{ - int k; - int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr); - - if (!sg_proc_sgp) - return; - for (k = 0; k < num_leaves; ++k) - remove_proc_entry(sg_proc_leaf_arr[k].name, sg_proc_sgp); - remove_proc_entry(sg_proc_sg_dirname, NULL); + proc_create("allow_dio", S_IRUGO | S_IWUSR, p, &adio_fops); + proc_create_seq("debug", S_IRUGO, p, &debug_seq_ops); + proc_create("def_reserved_size", S_IRUGO | S_IWUSR, p, &dressz_fops); + proc_create_single("device_hdr", S_IRUGO, p, sg_proc_seq_show_devhdr); + proc_create_seq("devices", S_IRUGO, p, &dev_seq_ops); + proc_create_seq("device_strs", S_IRUGO, p, &devstrs_seq_ops); + proc_create_single("version", S_IRUGO, p, sg_proc_seq_show_version); + return 0; } @@ -2482,22 +2407,12 @@ static int sg_proc_seq_show_version(struct seq_file *s, void *v) return 0; } -static int sg_proc_single_open_version(struct inode *inode, struct file *file) -{ - return single_open(file, sg_proc_seq_show_version, NULL); -} - static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v) { seq_puts(s, "host\tchan\tid\tlun\ttype\topens\tqdepth\tbusy\tonline\n"); return 0; } -static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file) -{ - return single_open(file, sg_proc_seq_show_devhdr, NULL); -} - struct sg_proc_deviter { loff_t index; size_t max; @@ -2531,11 +2446,6 @@ static void dev_seq_stop(struct seq_file *s, void *v) kfree(s->private); } -static int sg_proc_open_dev(struct inode *inode, struct file *file) -{ - return seq_open(file, &dev_seq_ops); -} - static int sg_proc_seq_show_dev(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; @@ -2562,11 +2472,6 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v) return 0; } -static int sg_proc_open_devstrs(struct inode *inode, struct file *file) -{ - return seq_open(file, &devstrs_seq_ops); -} - static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; @@ -2650,11 +2555,6 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) } } -static int sg_proc_open_debug(struct inode *inode, struct file *file) -{ - return seq_open(file, &debug_seq_ops); -} - static int sg_proc_seq_show_debug(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 2a21f2d48592..35fab1e18adc 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -188,9 +188,13 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) struct scsi_device *SDev; struct scsi_sense_hdr sshdr; int result, err = 0, retries = 0; + unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL; SDev = cd->device; + if (cgc->sense) + senseptr = sense_buffer; + retry: if (!scsi_block_when_processing_errors(SDev)) { err = -ENODEV; @@ -198,10 +202,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) } result = scsi_execute(SDev, cgc->cmd, cgc->data_direction, - cgc->buffer, cgc->buflen, - (unsigned char *)cgc->sense, &sshdr, + cgc->buffer, cgc->buflen, senseptr, &sshdr, cgc->timeout, IOCTL_RETRIES, 0, 0, NULL); + if (cgc->sense) + memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense)); + /* Minimal error checking. Ignore cases we know about, and report the rest. */ if (driver_byte(result) != 0) { switch (sshdr.sense_key) { diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 6c399480783d..a427ce9497be 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -545,7 +545,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, req = blk_get_request(SRpnt->stp->device->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(req)) return DRIVER_ERROR << 24; rq = scsi_req(req); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 00e79057f870..d0a1674915a1 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6497,12 +6497,12 @@ static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd) bool found = false; if (!scmd || !scmd->device || !scmd->device->host) - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; host = scmd->device->host; hba = shost_priv(host); if (!hba) - return BLK_EH_NOT_HANDLED; + return BLK_EH_DONE; spin_lock_irqsave(host->host_lock, flags); @@ -6520,7 +6520,7 @@ static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd) * SCSI command was not actually dispatched to UFS driver, otherwise * let SCSI layer handle the error as usual. */ - return found ? BLK_EH_NOT_HANDLED : BLK_EH_RESET_TIMER; + return found ? BLK_EH_DONE : BLK_EH_RESET_TIMER; } static const struct attribute_group *ufshcd_driver_groups[] = { diff --git a/drivers/soc/lantiq/gphy.c b/drivers/soc/lantiq/gphy.c index 8d8659463b3e..feeb17cebc25 100644 --- a/drivers/soc/lantiq/gphy.c +++ b/drivers/soc/lantiq/gphy.c @@ -30,7 +30,6 @@ struct xway_gphy_priv { struct clk *gphy_clk_gate; struct reset_control *gphy_reset; struct reset_control *gphy_reset2; - struct notifier_block gphy_reboot_nb; void __iomem *membase; char *fw_name; }; @@ -64,24 +63,6 @@ static const struct of_device_id xway_gphy_match[] = { }; MODULE_DEVICE_TABLE(of, xway_gphy_match); -static struct xway_gphy_priv *to_xway_gphy_priv(struct notifier_block *nb) -{ - return container_of(nb, struct xway_gphy_priv, gphy_reboot_nb); -} - -static int xway_gphy_reboot_notify(struct notifier_block *reboot_nb, - unsigned long code, void *unused) -{ - struct xway_gphy_priv *priv = to_xway_gphy_priv(reboot_nb); - - if (priv) { - reset_control_assert(priv->gphy_reset); - reset_control_assert(priv->gphy_reset2); - } - - return NOTIFY_DONE; -} - static int xway_gphy_load(struct device *dev, struct xway_gphy_priv *priv, dma_addr_t *dev_addr) { @@ -205,14 +186,6 @@ static int xway_gphy_probe(struct platform_device *pdev) reset_control_deassert(priv->gphy_reset); reset_control_deassert(priv->gphy_reset2); - /* assert the gphy reset because it can hang after a reboot: */ - priv->gphy_reboot_nb.notifier_call = xway_gphy_reboot_notify; - priv->gphy_reboot_nb.priority = -1; - - ret = register_reboot_notifier(&priv->gphy_reboot_nb); - if (ret) - dev_warn(dev, "Failed to register reboot notifier\n"); - platform_set_drvdata(pdev, priv); return ret; @@ -220,21 +193,12 @@ static int xway_gphy_probe(struct platform_device *pdev) static int xway_gphy_remove(struct platform_device *pdev) { - struct device *dev = &pdev->dev; struct xway_gphy_priv *priv = platform_get_drvdata(pdev); - int ret; - - reset_control_assert(priv->gphy_reset); - reset_control_assert(priv->gphy_reset2); iowrite32be(0, priv->membase); clk_disable_unprepare(priv->gphy_clk_gate); - ret = unregister_reboot_notifier(&priv->gphy_reboot_nb); - if (ret) - dev_warn(dev, "Failed to unregister reboot notifier\n"); - return 0; } diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 9371651d8017..c574dd210500 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -117,7 +117,7 @@ config SSB_SERIAL config SSB_DRIVER_PCICORE_POSSIBLE bool - depends on SSB_PCIHOST && SSB = y + depends on SSB_PCIHOST default y config SSB_DRIVER_PCICORE @@ -131,7 +131,7 @@ config SSB_DRIVER_PCICORE config SSB_PCICORE_HOSTMODE bool "Hostmode support for SSB PCI core" - depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS + depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS && SSB = y help PCIcore hostmode operation (external PCI bus). diff --git a/drivers/staging/comedi/proc.c b/drivers/staging/comedi/proc.c index 50d38938ac6f..8bc8e42beb90 100644 --- a/drivers/staging/comedi/proc.c +++ b/drivers/staging/comedi/proc.c @@ -62,25 +62,9 @@ static int comedi_read(struct seq_file *m, void *v) return 0; } -/* - * seq_file wrappers for procfile show routines. - */ -static int comedi_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, comedi_read, NULL); -} - -static const struct file_operations comedi_proc_fops = { - .owner = THIS_MODULE, - .open = comedi_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void __init comedi_proc_init(void) { - if (!proc_create("comedi", 0444, NULL, &comedi_proc_fops)) + if (!proc_create_single("comedi", 0444, NULL, comedi_read)) pr_warn("comedi: unable to create proc entry\n"); } diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c index e8bfe5520bc7..fa0dd425b454 100644 --- a/drivers/staging/fwserial/fwserial.c +++ b/drivers/staging/fwserial/fwserial.c @@ -1506,11 +1506,6 @@ static int fwtty_debugfs_peers_show(struct seq_file *m, void *v) return 0; } -static int fwtty_proc_open(struct inode *inode, struct file *fp) -{ - return single_open(fp, fwtty_proc_show, NULL); -} - static int fwtty_stats_open(struct inode *inode, struct file *fp) { return single_open(fp, fwtty_debugfs_stats_show, inode->i_private); @@ -1537,14 +1532,6 @@ static const struct file_operations fwtty_peers_fops = { .release = single_release, }; -static const struct file_operations fwtty_proc_fops = { - .owner = THIS_MODULE, - .open = fwtty_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_port_operations fwtty_port_ops = { .dtr_rts = fwtty_port_dtr_rts, .carrier_raised = fwtty_port_carrier_raised, @@ -1570,7 +1557,7 @@ static const struct tty_operations fwtty_ops = { .tiocmget = fwtty_tiocmget, .tiocmset = fwtty_tiocmset, .get_icount = fwtty_get_icount, - .proc_fops = &fwtty_proc_fops, + .proc_show = fwtty_proc_show, }; static const struct tty_operations fwloop_ops = { diff --git a/drivers/staging/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c index b9232e4e2ed4..360f0ad970de 100644 --- a/drivers/staging/ipx/ipx_proc.c +++ b/drivers/staging/ipx/ipx_proc.c @@ -244,42 +244,6 @@ static const struct seq_operations ipx_seq_socket_ops = { .show = ipx_seq_socket_show, }; -static int ipx_seq_route_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ipx_seq_route_ops); -} - -static int ipx_seq_interface_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ipx_seq_interface_ops); -} - -static int ipx_seq_socket_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ipx_seq_socket_ops); -} - -static const struct file_operations ipx_seq_interface_fops = { - .open = ipx_seq_interface_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations ipx_seq_route_fops = { - .open = ipx_seq_route_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations ipx_seq_socket_fops = { - .open = ipx_seq_socket_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct proc_dir_entry *ipx_proc_dir; int __init ipx_proc_init(void) @@ -291,16 +255,17 @@ int __init ipx_proc_init(void) if (!ipx_proc_dir) goto out; - p = proc_create("interface", S_IRUGO, - ipx_proc_dir, &ipx_seq_interface_fops); + p = proc_create_seq("interface", S_IRUGO, ipx_proc_dir, + &ipx_seq_interface_ops); if (!p) goto out_interface; - p = proc_create("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_fops); + p = proc_create_seq("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_ops); if (!p) goto out_route; - p = proc_create("socket", S_IRUGO, ipx_proc_dir, &ipx_seq_socket_fops); + p = proc_create_seq("socket", S_IRUGO, ipx_proc_dir, + &ipx_seq_socket_ops); if (!p) goto out_socket; diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index d607c59761cf..7a0dbc0fa18e 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -646,64 +646,25 @@ static void rtl8192_proc_module_init(void) rtl8192_proc = proc_mkdir(RTL819xU_MODULE_NAME, init_net.proc_net); } -/* - * seq_file wrappers for procfile show routines. - */ -static int rtl8192_proc_open(struct inode *inode, struct file *file) -{ - struct net_device *dev = proc_get_parent_data(inode); - int (*show)(struct seq_file *, void *) = PDE_DATA(inode); - - return single_open(file, show, dev); -} - -static const struct file_operations rtl8192_proc_fops = { - .open = rtl8192_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * Table of proc files we need to create. - */ -struct rtl8192_proc_file { - char name[12]; - int (*show)(struct seq_file *, void *); -}; - -static const struct rtl8192_proc_file rtl8192_proc_files[] = { - { "stats-rx", &proc_get_stats_rx }, - { "stats-tx", &proc_get_stats_tx }, - { "stats-ap", &proc_get_stats_ap }, - { "registers", &proc_get_registers }, - { "" } -}; - static void rtl8192_proc_init_one(struct net_device *dev) { - const struct rtl8192_proc_file *f; struct proc_dir_entry *dir; - if (rtl8192_proc) { - dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev); - if (!dir) { - RT_TRACE(COMP_ERR, - "Unable to initialize /proc/net/rtl8192/%s\n", - dev->name); - return; - } + if (!rtl8192_proc) + return; - for (f = rtl8192_proc_files; f->name[0]; f++) { - if (!proc_create_data(f->name, S_IFREG | S_IRUGO, dir, - &rtl8192_proc_fops, f->show)) { - RT_TRACE(COMP_ERR, - "Unable to initialize /proc/net/rtl8192/%s/%s\n", - dev->name, f->name); - return; - } - } - } + dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev); + if (!dir) + return; + + proc_create_single("stats-rx", S_IFREG | S_IRUGO, dir, + proc_get_stats_rx); + proc_create_single("stats-tx", S_IFREG | S_IRUGO, dir, + proc_get_stats_tx); + proc_create_single("stats-ap", S_IFREG | S_IRUGO, dir, + proc_get_stats_ap); + proc_create_single("registers", S_IFREG | S_IRUGO, dir, + proc_get_registers); } static void rtl8192_proc_remove_one(struct net_device *dev) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 60429011292a..ce1321a5cb7b 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -94,8 +94,8 @@ static int iblock_configure_device(struct se_device *dev) return -EINVAL; } - ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!ib_dev->ibd_bio_set) { + ret = bioset_init(&ib_dev->ibd_bio_set, IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (ret) { pr_err("IBLOCK: Unable to create bioset\n"); goto out; } @@ -141,7 +141,7 @@ static int iblock_configure_device(struct se_device *dev) bi = bdev_get_integrity(bd); if (bi) { - struct bio_set *bs = ib_dev->ibd_bio_set; + struct bio_set *bs = &ib_dev->ibd_bio_set; if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") || !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) { @@ -164,7 +164,7 @@ static int iblock_configure_device(struct se_device *dev) goto out_blkdev_put; } pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n", - bs->bio_integrity_pool); + &bs->bio_integrity_pool); } dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type; } @@ -174,8 +174,7 @@ static int iblock_configure_device(struct se_device *dev) out_blkdev_put: blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); out_free_bioset: - bioset_free(ib_dev->ibd_bio_set); - ib_dev->ibd_bio_set = NULL; + bioset_exit(&ib_dev->ibd_bio_set); out: return ret; } @@ -199,8 +198,7 @@ static void iblock_destroy_device(struct se_device *dev) if (ib_dev->ibd_bd != NULL) blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); - if (ib_dev->ibd_bio_set != NULL) - bioset_free(ib_dev->ibd_bio_set); + bioset_exit(&ib_dev->ibd_bio_set); } static unsigned long long iblock_emulate_read_cap_with_block_size( @@ -332,7 +330,7 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op, if (sg_num > BIO_MAX_PAGES) sg_num = BIO_MAX_PAGES; - bio = bio_alloc_bioset(GFP_NOIO, sg_num, ib_dev->ibd_bio_set); + bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set); if (!bio) { pr_err("Unable to allocate memory for bio\n"); return NULL; diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h index b4aeb2584ad4..9cc3843404d4 100644 --- a/drivers/target/target_core_iblock.h +++ b/drivers/target/target_core_iblock.h @@ -22,7 +22,7 @@ struct iblock_dev { struct se_device dev; unsigned char ibd_udev_path[SE_UDEV_PATH_LEN]; u32 ibd_flags; - struct bio_set *ibd_bio_set; + struct bio_set ibd_bio_set; struct block_device *ibd_bd; bool ibd_readonly; } ____cacheline_aligned; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 6cb933ecc084..668934ea74cb 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -986,8 +986,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, cmd->data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, - GFP_KERNEL); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); if (IS_ERR(req)) { pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4ad89ea71a70..4f26bdc3d1dc 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item, if (val >= 0) { udev->qfull_time_out = val * MSEC_PER_SEC; + } else if (val == -1) { + udev->qfull_time_out = val; } else { printk(KERN_ERR "Invalid qfull timeout value %d\n", val); return -EINVAL; diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 2d2ceda9aa26..500911f16498 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -1255,7 +1255,7 @@ static int icm_ar_get_boot_acl(struct tb *tb, uuid_t *uuids, size_t nuuids) /* Map empty entries to null UUID */ uuid[0] = 0; uuid[1] = 0; - } else { + } else if (uuid[0] != 0 || uuid[1] != 0) { /* Upper two DWs are always one's */ uuid[2] = 0xffffffff; uuid[3] = 0xffffffff; diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c index 32d7ce430b02..34dead614149 100644 --- a/drivers/tty/amiserial.c +++ b/drivers/tty/amiserial.c @@ -1566,19 +1566,6 @@ static int rs_proc_show(struct seq_file *m, void *v) return 0; } -static int rs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rs_proc_show, NULL); -} - -static const struct file_operations rs_proc_fops = { - .owner = THIS_MODULE, - .open = rs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * --------------------------------------------------------------------- * rs_init() and friends @@ -1620,7 +1607,7 @@ static const struct tty_operations serial_ops = { .tiocmget = rs_tiocmget, .tiocmset = rs_tiocmset, .get_icount = rs_get_icount, - .proc_fops = &rs_proc_fops, + .proc_show = rs_proc_show, }; static int amiga_carrier_raised(struct tty_port *port) diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index cf0bde3bb927..6d3c58051ce3 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -3972,19 +3972,6 @@ static int cyclades_proc_show(struct seq_file *m, void *v) return 0; } -static int cyclades_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cyclades_proc_show, NULL); -} - -static const struct file_operations cyclades_proc_fops = { - .owner = THIS_MODULE, - .open = cyclades_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* The serial driver boot-time initialization code! Hardware I/O ports are mapped to character special devices on a first found, first allocated manner. That is, this code searches @@ -4024,7 +4011,7 @@ static const struct tty_operations cy_ops = { .tiocmget = cy_tiocmget, .tiocmset = cy_tiocmset, .get_icount = cy_get_icount, - .proc_fops = &cyclades_proc_fops, + .proc_show = cyclades_proc_show, }; static int __init cy_init(void) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0466f9f08a91..6ff9405954a6 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1829,19 +1829,6 @@ static int uart_proc_show(struct seq_file *m, void *v) uart_line_info(m, drv, i); return 0; } - -static int uart_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uart_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations uart_proc_fops = { - .owner = THIS_MODULE, - .open = uart_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL) @@ -2415,7 +2402,7 @@ static const struct tty_operations uart_ops = { .break_ctl = uart_break_ctl, .wait_until_sent= uart_wait_until_sent, #ifdef CONFIG_PROC_FS - .proc_fops = &uart_proc_fops, + .proc_show = uart_proc_show, #endif .tiocmget = uart_tiocmget, .tiocmset = uart_tiocmset, diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index 3c4ad71f261d..fbdf4d01c6a9 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -3534,19 +3534,6 @@ static int mgsl_proc_show(struct seq_file *m, void *v) return 0; } -static int mgsl_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, mgsl_proc_show, NULL); -} - -static const struct file_operations mgsl_proc_fops = { - .owner = THIS_MODULE, - .open = mgsl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* mgsl_allocate_dma_buffers() * * Allocate and format DMA buffers (ISA adapter) @@ -4298,7 +4285,7 @@ static const struct tty_operations mgsl_ops = { .tiocmget = tiocmget, .tiocmset = tiocmset, .get_icount = msgl_get_icount, - .proc_fops = &mgsl_proc_fops, + .proc_show = mgsl_proc_show, }; /* diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 255c49687877..a94086597ebd 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1316,19 +1316,6 @@ static int synclink_gt_proc_show(struct seq_file *m, void *v) return 0; } -static int synclink_gt_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, synclink_gt_proc_show, NULL); -} - -static const struct file_operations synclink_gt_proc_fops = { - .owner = THIS_MODULE, - .open = synclink_gt_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * return count of bytes in transmit buffer */ @@ -3721,7 +3708,7 @@ static const struct tty_operations ops = { .tiocmget = tiocmget, .tiocmset = tiocmset, .get_icount = get_icount, - .proc_fops = &synclink_gt_proc_fops, + .proc_show = synclink_gt_proc_show, }; static void slgt_cleanup(void) diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 75f11ce1f0a1..1e4d5b9c981a 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1421,19 +1421,6 @@ static int synclinkmp_proc_show(struct seq_file *m, void *v) return 0; } -static int synclinkmp_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, synclinkmp_proc_show, NULL); -} - -static const struct file_operations synclinkmp_proc_fops = { - .owner = THIS_MODULE, - .open = synclinkmp_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* Return the count of bytes in transmit buffer */ static int chars_in_buffer(struct tty_struct *tty) @@ -3899,7 +3886,7 @@ static const struct tty_operations ops = { .tiocmget = tiocmget, .tiocmset = tiocmset, .get_icount = get_icount, - .proc_fops = &synclinkmp_proc_fops, + .proc_show = synclinkmp_proc_show, }; diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index fb7329ab2b37..fc4c97cae01e 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -229,26 +229,13 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v) return 0; } -static const struct seq_operations tty_ldiscs_seq_ops = { +const struct seq_operations tty_ldiscs_seq_ops = { .start = tty_ldiscs_seq_start, .next = tty_ldiscs_seq_next, .stop = tty_ldiscs_seq_stop, .show = tty_ldiscs_seq_show, }; -static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_ldiscs_seq_ops); -} - -const struct file_operations tty_ldiscs_proc_fops = { - .owner = THIS_MODULE, - .open = proc_tty_ldiscs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /** * tty_ldisc_ref_wait - wait for the tty ldisc * @tty: tty device diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index ad743a8493be..03959dc86cfd 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -234,22 +234,10 @@ static int proc_udc_show(struct seq_file *s, void *unused) return 0; } -static int proc_udc_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_udc_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_ops = { - .owner = THIS_MODULE, - .open = proc_udc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void create_debug_file(struct at91_udc *udc) { - udc->pde = proc_create_data(debug_filename, 0, NULL, &proc_ops, udc); + udc->pde = proc_create_single_data(debug_filename, 0, NULL, + proc_udc_show, udc); } static void remove_debug_file(struct at91_udc *udc) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index 56b517a38865..7d8af299dfc7 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -2207,22 +2207,8 @@ static int fsl_proc_read(struct seq_file *m, void *v) return 0; } -/* - * seq_file wrappers for procfile show routines. - */ -static int fsl_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, fsl_proc_read, NULL); -} - -static const struct file_operations fsl_proc_fops = { - .open = fsl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -#define create_proc_file() proc_create(proc_filename, 0, NULL, &fsl_proc_fops) +#define create_proc_file() \ + proc_create_single(proc_filename, 0, NULL, fsl_proc_read) #define remove_proc_file() remove_proc_entry(proc_filename, NULL) #else /* !CONFIG_USB_GADGET_DEBUG_FILES */ diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c index 4504d0b202db..c3721225b61e 100644 --- a/drivers/usb/gadget/udc/goku_udc.c +++ b/drivers/usb/gadget/udc/goku_udc.c @@ -1241,22 +1241,6 @@ done: local_irq_restore(flags); return 0; } - -/* - * seq_file wrappers for procfile show routines. - */ -static int udc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, udc_proc_read, PDE_DATA(file_inode(file))); -} - -static const struct file_operations udc_proc_fops = { - .open = udc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #endif /* CONFIG_USB_GADGET_DEBUG_FILES */ /*-------------------------------------------------------------------------*/ @@ -1826,7 +1810,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) #ifdef CONFIG_USB_GADGET_DEBUG_FILES - proc_create_data(proc_node_name, 0, NULL, &udc_proc_fops, dev); + proc_create_single_data(proc_node_name, 0, NULL, udc_proc_read, dev); #endif retval = usb_add_gadget_udc_release(&pdev->dev, &dev->gadget, diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index dc35a54bad90..3a16431da321 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2432,22 +2432,9 @@ static int proc_udc_show(struct seq_file *s, void *_) return 0; } -static int proc_udc_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_udc_show, NULL); -} - -static const struct file_operations proc_ops = { - .owner = THIS_MODULE, - .open = proc_udc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void create_proc_file(void) { - proc_create(proc_filename, 0, NULL, &proc_ops); + proc_create_single(proc_filename, 0, NULL, proc_udc_show); } static void remove_proc_file(void) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 790e0cbe3da9..268ffa6b51d2 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -476,19 +476,6 @@ static int serial_proc_show(struct seq_file *m, void *v) return 0; } -static int serial_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, serial_proc_show, NULL); -} - -static const struct file_operations serial_proc_fops = { - .owner = THIS_MODULE, - .open = serial_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int serial_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; @@ -1192,7 +1179,7 @@ static const struct tty_operations serial_ops = { .get_icount = serial_get_icount, .cleanup = serial_cleanup, .install = serial_install, - .proc_fops = &serial_proc_fops, + .proc_show = serial_proc_show, }; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5c212bf29640..3c082451ab1a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -404,6 +404,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; + bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; /* This code path is only user initiated */ @@ -414,23 +415,14 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, if (ret) return ret; - if (is_invalid_reserved_pfn(*pfn_base)) { - struct vm_area_struct *vma; - - down_read(¤t->mm->mmap_sem); - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); - pinned = min_t(long, npage, vma_pages(vma)); - up_read(¤t->mm->mmap_sem); - return pinned; - } - pinned++; + rsvd = is_invalid_reserved_pfn(*pfn_base); /* * Reserved pages aren't counted against the user, externally pinned * pages are already counted against the user. */ - if (!vfio_find_vpfn(dma, iova)) { + if (!rsvd && !vfio_find_vpfn(dma, iova)) { if (!lock_cap && current->mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, @@ -450,12 +442,13 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, if (ret) break; - if (pfn != *pfn_base + pinned) { + if (pfn != *pfn_base + pinned || + rsvd != is_invalid_reserved_pfn(pfn)) { put_pfn(pfn, dma->prot); break; } - if (!vfio_find_vpfn(dma, iova)) { + if (!rsvd && !vfio_find_vpfn(dma, iova)) { if (!lock_cap && current->mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); @@ -473,8 +466,10 @@ out: unpin_out: if (ret) { - for (pfn = *pfn_base ; pinned ; pfn++, pinned--) - put_pfn(pfn, dma->prot); + if (!rsvd) { + for (pfn = *pfn_base ; pinned ; pfn++, pinned--) + put_pfn(pfn, dma->prot); + } return ret; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 986058a57917..eeaf6739215f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -105,7 +105,9 @@ struct vhost_net_virtqueue { /* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; - /* first used idx for DMA done zerocopy buffers */ + /* For TX, first used idx for DMA done zerocopy buffers + * For RX, number of batched heads + */ int done_idx; /* an array of userspace buffers info */ struct ubuf_info *ubuf_info; @@ -626,6 +628,18 @@ static int sk_has_rx_data(struct sock *sk) return skb_queue_empty(&sk->sk_receive_queue); } +static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) +{ + struct vhost_virtqueue *vq = &nvq->vq; + struct vhost_dev *dev = vq->dev; + + if (!nvq->done_idx) + return; + + vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); + nvq->done_idx = 0; +} + static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) { struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX]; @@ -635,6 +649,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) int len = peek_head_len(rvq, sk); if (!len && vq->busyloop_timeout) { + /* Flush batched heads first */ + vhost_rx_signal_used(rvq); /* Both tx vq and rx socket were polled here */ mutex_lock_nested(&vq->mutex, 1); vhost_disable_notify(&net->dev, vq); @@ -762,7 +778,7 @@ static void handle_rx(struct vhost_net *net) }; size_t total_len = 0; int err, mergeable; - s16 headcount, nheads = 0; + s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; @@ -790,8 +806,8 @@ static void handle_rx(struct vhost_net *net) while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; - headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len, - &in, vq_log, &log, + headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, + vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) @@ -862,12 +878,9 @@ static void handle_rx(struct vhost_net *net) vhost_discard_vq_desc(vq, headcount); goto out; } - nheads += headcount; - if (nheads > VHOST_RX_BATCH) { - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, - nheads); - nheads = 0; - } + nvq->done_idx += headcount; + if (nvq->done_idx > VHOST_RX_BATCH) + vhost_rx_signal_used(nvq); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; @@ -878,9 +891,7 @@ static void handle_rx(struct vhost_net *net) } vhost_net_enable_vq(net, vq); out: - if (nheads) - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, - nheads); + vhost_rx_signal_used(nvq); mutex_unlock(&vq->mutex); } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f3bd8e941224..f0be5f35ab28 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -981,6 +981,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, { int ret = 0; + mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: @@ -1016,6 +1017,8 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, } vhost_dev_unlock_vqs(dev); + mutex_unlock(&dev->mutex); + return ret; } ssize_t vhost_chr_write_iter(struct vhost_dev *dev, diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index f741ba8df01b..924d0730ffe2 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -713,19 +713,6 @@ static const struct seq_operations proc_fb_seq_ops = { .show = fb_seq_show, }; -static int proc_fb_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_fb_seq_ops); -} - -static const struct file_operations fb_proc_fops = { - .owner = THIS_MODULE, - .open = proc_fb_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * We hold a reference to the fb_info in file->private_data, * but if the current registered fb has changed, we don't @@ -1877,7 +1864,7 @@ fbmem_init(void) { int ret; - if (!proc_create("fb", 0, NULL, &fb_proc_fops)) + if (!proc_create_seq("fb", 0, NULL, &proc_fb_seq_ops)) return -ENOMEM; ret = register_chrdev(FB_MAJOR, "fb", &fb_fops); diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c index badee04ef496..9b45125988fb 100644 --- a/drivers/video/fbdev/via/viafbdev.c +++ b/drivers/video/fbdev/via/viafbdev.c @@ -1475,19 +1475,6 @@ static int viafb_sup_odev_proc_show(struct seq_file *m, void *v) return 0; } -static int viafb_sup_odev_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, viafb_sup_odev_proc_show, NULL); -} - -static const struct file_operations viafb_sup_odev_proc_fops = { - .owner = THIS_MODULE, - .open = viafb_sup_odev_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static ssize_t odev_update(const char __user *buffer, size_t count, u32 *odev) { char buf[64], *ptr = buf; @@ -1616,8 +1603,8 @@ static void viafb_init_proc(struct viafb_shared *shared) &viafb_vt1636_proc_fops); #endif /* CONFIG_FB_VIA_DIRECT_PROCFS */ - proc_create("supported_output_devices", 0, viafb_entry, - &viafb_sup_odev_proc_fops); + proc_create_single("supported_output_devices", 0, viafb_entry, + viafb_sup_odev_proc_show); iga1_entry = proc_mkdir("iga1", viafb_entry); shared->iga1_proc_entry = iga1_entry; proc_create("output_devices", 0, iga1_entry, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e1c60899fdbc..a6f9ba85dc4b 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -351,7 +351,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, * physical address */ phys = xen_bus_to_phys(dev_addr); - if (((dev_addr + size - 1 > dma_mask)) || + if (((dev_addr + size - 1 <= dma_mask)) || range_straddles_page_boundary(phys, size)) xen_destroy_contiguous_region(phys, order); diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index df05a26ab8d8..2e4ca4dc0960 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -96,19 +96,6 @@ static const struct seq_operations zorro_devices_seq_ops = { .show = zorro_seq_show, }; -static int zorro_devices_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &zorro_devices_seq_ops); -} - -static const struct file_operations zorro_devices_proc_fops = { - .owner = THIS_MODULE, - .open = zorro_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct proc_dir_entry *proc_bus_zorro_dir; static int __init zorro_proc_attach_device(unsigned int slot) @@ -132,8 +119,8 @@ static int __init zorro_proc_init(void) if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) { proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL); - proc_create("devices", 0, proc_bus_zorro_dir, - &zorro_devices_proc_fops); + proc_create_seq("devices", 0, proc_bus_zorro_dir, + &zorro_devices_seq_ops); for (slot = 0; slot < zorro_num_autocon; slot++) zorro_proc_attach_device(slot); } diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d8aa0ae3d037..41c5749f4db7 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) struct super_block *sb = dir->i_sb; struct buffer_head *bh; struct inode *inode = NULL; + struct dentry *res; pr_debug("%s(\"%pd\")\n", __func__, dentry); affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); - affs_unlock_dir(dir); - if (IS_ERR(bh)) + if (IS_ERR(bh)) { + affs_unlock_dir(dir); return ERR_CAST(bh); + } if (bh) { u32 ino = bh->b_blocknr; @@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) } affs_brelse(bh); inode = affs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); } - d_add(dentry, inode); - return NULL; + res = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(res)) + res->d_fsdata = dentry->d_fsdata; + affs_unlock_dir(dir); + return res; } int diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 839a22280606..3aad32762989 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -62,7 +62,6 @@ static const struct file_operations afs_proc_rootcell_fops = { .llseek = no_llseek, }; -static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos); static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *pos); @@ -76,15 +75,6 @@ static const struct seq_operations afs_proc_cell_volumes_ops = { .show = afs_proc_cell_volumes_show, }; -static const struct file_operations afs_proc_cell_volumes_fops = { - .open = afs_proc_cell_volumes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int afs_proc_cell_vlservers_open(struct inode *inode, - struct file *file); static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos); static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *pos); @@ -98,14 +88,6 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = { .show = afs_proc_cell_vlservers_show, }; -static const struct file_operations afs_proc_cell_vlservers_fops = { - .open = afs_proc_cell_vlservers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int afs_proc_servers_open(struct inode *inode, struct file *file); static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); static void *afs_proc_servers_next(struct seq_file *p, void *v, loff_t *pos); @@ -119,13 +101,6 @@ static const struct seq_operations afs_proc_servers_ops = { .show = afs_proc_servers_show, }; -static const struct file_operations afs_proc_servers_fops = { - .open = afs_proc_servers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int afs_proc_sysname_open(struct inode *inode, struct file *file); static int afs_proc_sysname_release(struct inode *inode, struct file *file); static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); @@ -152,7 +127,7 @@ static const struct file_operations afs_proc_sysname_fops = { .write = afs_proc_sysname_write, }; -static const struct file_operations afs_proc_stats_fops; +static int afs_proc_stats_show(struct seq_file *m, void *v); /* * initialise the /proc/fs/afs/ directory @@ -167,8 +142,8 @@ int afs_proc_init(struct afs_net *net) if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) || - !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) || + !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) || + !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) || !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) goto error_tree; @@ -196,16 +171,7 @@ void afs_proc_cleanup(struct afs_net *net) */ static int afs_proc_cells_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - - ret = seq_open(file, &afs_proc_cells_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = PDE_DATA(inode); - return 0; + return seq_open(file, &afs_proc_cells_ops); } /* @@ -430,10 +396,11 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) if (!dir) goto error_dir; - if (!proc_create_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_fops, cell) || - !proc_create_data("volumes", 0, dir, - &afs_proc_cell_volumes_fops, cell)) + if (!proc_create_seq_data("vlservers", 0, dir, + &afs_proc_cell_vlservers_ops, cell)) + goto error_tree; + if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops, + cell)) goto error_tree; _leave(" = 0"); @@ -459,36 +426,13 @@ void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) } /* - * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells - */ -static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file) -{ - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_volumes_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = cell; - - return 0; -} - -/* * set up the iterator to start reading from the cells list and return the * first item */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); _enter("cell=%p pos=%Ld", cell, *_pos); @@ -502,7 +446,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *_pos) { - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); _enter("cell=%p pos=%Ld", cell, *_pos); return seq_list_next(v, &cell->proc_volumes, _pos); @@ -514,7 +458,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) __releases(cell->proc_lock) { - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); read_unlock(&cell->proc_lock); } @@ -530,7 +474,7 @@ static const char afs_vol_types[3][3] = { */ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); /* Display header on line 1 */ @@ -547,30 +491,6 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume - * location server - */ -static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) -{ - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_vlservers_ops); - if (ret<0) - return ret; - - m = file->private_data; - m->private = cell; - - return 0; -} - -/* * set up the iterator to start reading from the cells list and return the * first item */ @@ -578,7 +498,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { struct afs_addr_list *alist; - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); @@ -603,7 +523,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos) { struct afs_addr_list *alist; - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); loff_t pos; alist = rcu_dereference(cell->vl_addrs); @@ -644,15 +564,6 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs/servers" which provides a summary of active - * servers - */ -static int afs_proc_servers_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &afs_proc_servers_ops); -} - -/* * Set up the iterator to start reading from the server list and return the * first item. */ @@ -931,18 +842,3 @@ static int afs_proc_stats_show(struct seq_file *m, void *v) atomic_long_read(&net->n_store_bytes)); return 0; } - -/* - * Open "/proc/fs/afs/stats" to allow reading of the stat counters. - */ -static int afs_proc_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, afs_proc_stats_show, NULL); -} - -static const struct file_operations afs_proc_stats_fops = { - .open = afs_proc_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; diff --git a/fs/afs/security.c b/fs/afs/security.c index 1992b0ffa543..81dfedb7879f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -372,18 +372,14 @@ int afs_permission(struct inode *inode, int mask) mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); if (S_ISDIR(inode->i_mode)) { - if (mask & MAY_EXEC) { + if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; - } else if (mask & MAY_READ) { - if (!(access & AFS_ACE_LOOKUP)) - goto permission_denied; - } else if (mask & MAY_WRITE) { + } + if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; - } else { - BUG(); } } else { if (!(access & AFS_ACE_LOOKUP)) diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1ed7e2fd2f35..c3b740813fc7 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uvldbentry__xdr *uvldb; struct afs_vldb_entry *entry; bool new_only = false; - u32 tmp, nr_servers; + u32 tmp, nr_servers, vlflags; int i, ret; _enter(""); @@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) new_only = true; } + vlflags = ntohl(uvldb->flags); for (i = 0; i < nr_servers; i++) { struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; @@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) if (tmp & AFS_VLSF_DONTUSE || (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) continue; - if (tmp & AFS_VLSF_RWVOL) + if (tmp & AFS_VLSF_RWVOL) { entry->fs_mask[i] |= AFS_VOL_VTM_RW; + if (vlflags & AFS_VLF_BACKEXISTS) + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + } if (tmp & AFS_VLSF_ROVOL) entry->fs_mask[i] |= AFS_VOL_VTM_RO; - if (tmp & AFS_VLSF_BACKVOL) - entry->fs_mask[i] |= AFS_VOL_VTM_BAK; if (!entry->fs_mask[i]) continue; @@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) for (i = 0; i < AFS_MAXTYPES; i++) entry->vid[i] = ntohl(uvldb->volumeId[i]); - tmp = ntohl(uvldb->flags); - if (tmp & AFS_VLF_RWEXISTS) + if (vlflags & AFS_VLF_RWEXISTS) __set_bit(AFS_VLDB_HAS_RW, &entry->flags); - if (tmp & AFS_VLF_ROEXISTS) + if (vlflags & AFS_VLF_ROEXISTS) __set_bit(AFS_VLDB_HAS_RO, &entry->flags); - if (tmp & AFS_VLF_BACKEXISTS) + if (vlflags & AFS_VLF_BACKEXISTS) __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); - if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { entry->error = -ENOMEDIUM; __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); } @@ -634,9 +634,8 @@ static void free_ioctx_users(struct percpu_ref *ref) while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, struct aio_kiocb, ki_list); - - list_del_init(&req->ki_list); kiocb_cancel(req); + list_del_init(&req->ki_list); } spin_unlock_irq(&ctx->ctx_lock); @@ -1078,8 +1077,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { - percpu_ref_get(&ctx->users); - ret = ctx; + if (percpu_ref_tryget_live(&ctx->users)) + ret = ctx; } out: rcu_read_unlock(); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index af2832aaeec5..4700b4534439 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) if (ret == BEFS_BT_NOT_FOUND) { befs_debug(sb, "<--- %s %pd not found", __func__, dentry); - d_add(dentry, NULL); - return ERR_PTR(-ENOENT); - + inode = NULL; } else if (ret != BEFS_OK || offset == 0) { befs_error(sb, "<--- %s Error", __func__); - return ERR_PTR(-ENODATA); + inode = ERR_PTR(-ENODATA); + } else { + inode = befs_iget(dir->i_sb, (ino_t) offset); } - - inode = befs_iget(dir->i_sb, (ino_t) offset); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - d_add(dentry, inode); - befs_debug(sb, "<--- %s", __func__); - return NULL; + return d_splice_alias(inode, dentry); } static int diff --git a/fs/block_dev.c b/fs/block_dev.c index 7ec920e27065..bef6934b6189 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -272,7 +272,7 @@ struct blkdev_dio { struct bio bio; }; -static struct bio_set *blkdev_dio_pool __read_mostly; +static struct bio_set blkdev_dio_pool; static void blkdev_bio_end_io(struct bio *bio) { @@ -334,7 +334,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool); + bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); @@ -432,10 +432,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static __init int blkdev_init(void) { - blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); - if (!blkdev_dio_pool) - return -ENOMEM; - return 0; + return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); } module_init(blkdev_init); @@ -1322,27 +1319,30 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. + * @verbose: if %true log a message about a size change if there is any * * This routine checks to see if the bdev size does not match the disk size * and adjusts it if it differs. When shrinking the bdev size, its all caches * are freed. */ -void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) +void check_disk_size_change(struct gendisk *disk, struct block_device *bdev, + bool verbose) { loff_t disk_size, bdev_size; disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { - printk(KERN_INFO - "%s: detected capacity change from %lld to %lld\n", - disk->disk_name, bdev_size, disk_size); + if (verbose) { + printk(KERN_INFO + "%s: detected capacity change from %lld to %lld\n", + disk->disk_name, bdev_size, disk_size); + } i_size_write(bdev->bd_inode, disk_size); if (bdev_size > disk_size) flush_disk(bdev, false); } } -EXPORT_SYMBOL(check_disk_size_change); /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back @@ -1364,7 +1364,7 @@ int revalidate_disk(struct gendisk *disk) return ret; mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev); + check_disk_size_change(disk, bdev, ret == 0); bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e99b329002cf..56d32bb462f9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -26,7 +26,7 @@ static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; -static struct bio_set *btrfs_bioset; +static struct bio_set btrfs_bioset; static inline bool extent_state_in_tree(const struct extent_state *state) { @@ -162,20 +162,18 @@ int __init extent_io_init(void) if (!extent_buffer_cache) goto free_state_cache; - btrfs_bioset = bioset_create(BIO_POOL_SIZE, - offsetof(struct btrfs_io_bio, bio), - BIOSET_NEED_BVECS); - if (!btrfs_bioset) + if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, + offsetof(struct btrfs_io_bio, bio), + BIOSET_NEED_BVECS)) goto free_buffer_cache; - if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) + if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE)) goto free_bioset; return 0; free_bioset: - bioset_free(btrfs_bioset); - btrfs_bioset = NULL; + bioset_exit(&btrfs_bioset); free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); @@ -198,8 +196,7 @@ void __cold extent_io_exit(void) rcu_barrier(); kmem_cache_destroy(extent_state_cache); kmem_cache_destroy(extent_buffer_cache); - if (btrfs_bioset) - bioset_free(btrfs_bioset); + bioset_exit(&btrfs_bioset); } void extent_io_tree_init(struct extent_io_tree *tree, @@ -2679,7 +2676,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); @@ -2692,7 +2689,7 @@ struct bio *btrfs_bio_clone(struct bio *bio) struct bio *new; /* Bio allocation backed by a bioset does not fail */ - new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset); + new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); btrfs_bio = btrfs_io_bio(new); btrfs_io_bio_init(btrfs_bio); btrfs_bio->iter = bio->bi_iter; @@ -2704,7 +2701,7 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) struct bio *bio; /* Bio allocation backed by a bioset does not fail */ - bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; } @@ -2715,7 +2712,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) struct btrfs_io_bio *btrfs_bio; /* this will never fail when it's backed by a bioset */ - bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset); + bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); ASSERT(bio); btrfs_bio = btrfs_io_bio(bio); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e604e7071f1..0b86cf10cf2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6586,8 +6586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } else { btrfs_update_inode(trans, root, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); } out_unlock: @@ -6663,8 +6662,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); @@ -6809,12 +6807,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out_fail_inode; - d_instantiate(dentry, inode); - /* - * mkdir is special. We're unlocking after we call d_instantiate - * to avoid a race with nfsd calling d_instantiate. - */ - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: @@ -9124,7 +9117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) BTRFS_EXTENT_DATA_KEY); trans->block_rsv = &fs_info->trans_block_rsv; if (ret != -ENOSPC && ret != -EAGAIN) { - err = ret; + if (ret < 0) + err = ret; break; } @@ -10257,8 +10251,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0daa1e3fe0df..ab0bbe93b398 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -572,6 +572,11 @@ lookup_again: if (ret < 0) goto create_error; + if (unlikely(d_unhashed(next))) { + dput(next); + inode_unlock(d_inode(dir)); + goto lookup_again; + } ASSERT(d_backing_inode(next)); _debug("mkdir -> %p{%p{ino=%lu}}", @@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock(d_inode(dir)); +retry: start = jiffies; subdir = lookup_one_len(dirname, dir, strlen(dirname)); cachefiles_hist(cachefiles_lookup_histogram, start); @@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, if (ret < 0) goto mkdir_error; + if (unlikely(d_unhashed(subdir))) { + dput(subdir); + goto retry; + } ASSERT(d_backing_inode(subdir)); _debug("mkdir -> %p{%p{ino=%lu}}", diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c index 125b90f6c796..0ce1aa56b67f 100644 --- a/fs/cachefiles/proc.c +++ b/fs/cachefiles/proc.c @@ -85,21 +85,6 @@ static const struct seq_operations cachefiles_histogram_ops = { }; /* - * open "/proc/fs/cachefiles/XXX" which provide statistics summaries - */ -static int cachefiles_histogram_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &cachefiles_histogram_ops); -} - -static const struct file_operations cachefiles_histogram_fops = { - .open = cachefiles_histogram_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -/* * initialise the /proc/fs/cachefiles/ directory */ int __init cachefiles_proc_init(void) @@ -109,8 +94,8 @@ int __init cachefiles_proc_init(void) if (!proc_mkdir("fs/cachefiles", NULL)) goto error_dir; - if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, - &cachefiles_histogram_fops)) + if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL, + &cachefiles_histogram_ops)) goto error_histogram; _leave(" = 0"); diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 9d69ea433330..4bc4a7ac61d9 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -314,18 +314,6 @@ skip_rdma: return 0; } -static int cifs_debug_data_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cifs_debug_data_proc_show, NULL); -} - -static const struct file_operations cifs_debug_data_proc_fops = { - .open = cifs_debug_data_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_CIFS_STATS static ssize_t cifs_stats_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -497,7 +485,8 @@ cifs_proc_init(void) if (proc_fs_cifs == NULL) return; - proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops); + proc_create_single("DebugData", 0, proc_fs_cifs, + cifs_debug_data_proc_show); #ifdef CONFIG_CIFS_STATS proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 017b0ab19bc4..124b093d14e5 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); - if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) { + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); kill_mtd_super(sb); diff --git a/fs/dcache.c b/fs/dcache.c index 86d2de63461e..0e8e5de3c48a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -580,6 +580,7 @@ static void __dentry_kill(struct dentry *dentry) spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); + cond_resched(); } static struct dentry *__lock_parent(struct dentry *dentry) @@ -827,30 +828,24 @@ static inline bool fast_dput(struct dentry *dentry) */ void dput(struct dentry *dentry) { - if (unlikely(!dentry)) - return; + while (dentry) { + might_sleep(); -repeat: - might_sleep(); + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); + return; + } - rcu_read_lock(); - if (likely(fast_dput(dentry))) { + /* Slow case: now with the dentry lock held */ rcu_read_unlock(); - return; - } - - /* Slow case: now with the dentry lock held */ - rcu_read_unlock(); - if (likely(retain_dentry(dentry))) { - spin_unlock(&dentry->d_lock); - return; - } + if (likely(retain_dentry(dentry))) { + spin_unlock(&dentry->d_lock); + return; + } - dentry = dentry_kill(dentry); - if (dentry) { - cond_resched(); - goto repeat; + dentry = dentry_kill(dentry); } } EXPORT_SYMBOL(dput); @@ -907,6 +902,35 @@ repeat: } EXPORT_SYMBOL(dget_parent); +static struct dentry * __d_find_any_alias(struct inode *inode) +{ + struct dentry *alias; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + __dget(alias); + return alias; +} + +/** + * d_find_any_alias - find any alias for a given inode + * @inode: inode to find an alias for + * + * If any aliases exist for the given inode, take and return a + * reference for one of them. If no aliases exist, return %NULL. + */ +struct dentry *d_find_any_alias(struct inode *inode) +{ + struct dentry *de; + + spin_lock(&inode->i_lock); + de = __d_find_any_alias(inode); + spin_unlock(&inode->i_lock); + return de; +} +EXPORT_SYMBOL(d_find_any_alias); + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -923,34 +947,19 @@ EXPORT_SYMBOL(dget_parent); */ static struct dentry *__d_find_alias(struct inode *inode) { - struct dentry *alias, *discon_alias; + struct dentry *alias; + + if (S_ISDIR(inode->i_mode)) + return __d_find_any_alias(inode); -again: - discon_alias = NULL; hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); - if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { - if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) { - discon_alias = alias; - } else { - __dget_dlock(alias); - spin_unlock(&alias->d_lock); - return alias; - } - } - spin_unlock(&alias->d_lock); - } - if (discon_alias) { - alias = discon_alias; - spin_lock(&alias->d_lock); - if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (!d_unhashed(alias)) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } spin_unlock(&alias->d_lock); - goto again; } return NULL; } @@ -1052,8 +1061,6 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { struct dentry *dentry, *parent; - cond_resched(); - dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); @@ -1230,13 +1237,11 @@ enum d_walk_ret { * @parent: start of walk * @data: data passed to @enter() and @finish() * @enter: callback when first entering the dentry - * @finish: callback when successfully finished the walk * - * The @enter() and @finish() callbacks are called with d_lock held. + * The @enter() callbacks are called with d_lock held. */ static void d_walk(struct dentry *parent, void *data, - enum d_walk_ret (*enter)(void *, struct dentry *), - void (*finish)(void *)) + enum d_walk_ret (*enter)(void *, struct dentry *)) { struct dentry *this_parent; struct list_head *next; @@ -1325,8 +1330,6 @@ ascend: if (need_seqretry(&rename_lock, seq)) goto rename_retry; rcu_read_unlock(); - if (finish) - finish(data); out_unlock: spin_unlock(&this_parent->d_lock); @@ -1375,7 +1378,7 @@ int path_has_submounts(const struct path *parent) struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; read_seqlock_excl(&mount_lock); - d_walk(parent->dentry, &data, path_check_mount, NULL); + d_walk(parent->dentry, &data, path_check_mount); read_sequnlock_excl(&mount_lock); return data.mounted; @@ -1483,11 +1486,16 @@ void shrink_dcache_parent(struct dentry *parent) data.start = parent; data.found = 0; - d_walk(parent, &data, select_collect, NULL); + d_walk(parent, &data, select_collect); + + if (!list_empty(&data.dispose)) { + shrink_dentry_list(&data.dispose); + continue; + } + + cond_resched(); if (!data.found) break; - - shrink_dentry_list(&data.dispose); } } EXPORT_SYMBOL(shrink_dcache_parent); @@ -1518,7 +1526,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) static void do_one_tree(struct dentry *dentry) { shrink_dcache_parent(dentry); - d_walk(dentry, dentry, umount_check, NULL); + d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); } @@ -1542,78 +1550,48 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -struct detach_data { - struct select_data select; - struct dentry *mountpoint; -}; -static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry) +static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) { - struct detach_data *data = _data; - + struct dentry **victim = _data; if (d_mountpoint(dentry)) { __dget_dlock(dentry); - data->mountpoint = dentry; + *victim = dentry; return D_WALK_QUIT; } - - return select_collect(&data->select, dentry); -} - -static void check_and_drop(void *_data) -{ - struct detach_data *data = _data; - - if (!data->mountpoint && list_empty(&data->select.dispose)) - __d_drop(data->select.start); + return D_WALK_CONTINUE; } /** * d_invalidate - detach submounts, prune dcache, and drop * @dentry: dentry to invalidate (aka detach, prune and drop) - * - * no dcache lock. - * - * The final d_drop is done as an atomic operation relative to - * rename_lock ensuring there are no races with d_set_mounted. This - * ensures there are no unhashed dentries on the path to a mountpoint. */ void d_invalidate(struct dentry *dentry) { - /* - * If it's already been dropped, return OK. - */ + bool had_submounts = false; spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); return; } + __d_drop(dentry); spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ - if (!dentry->d_inode) { - d_drop(dentry); + if (!dentry->d_inode) return; - } + shrink_dcache_parent(dentry); for (;;) { - struct detach_data data; - - data.mountpoint = NULL; - INIT_LIST_HEAD(&data.select.dispose); - data.select.start = dentry; - data.select.found = 0; - - d_walk(dentry, &data, detach_and_collect, check_and_drop); - - if (!list_empty(&data.select.dispose)) - shrink_dentry_list(&data.select.dispose); - else if (!data.mountpoint) + struct dentry *victim = NULL; + d_walk(dentry, &victim, find_submount); + if (!victim) { + if (had_submounts) + shrink_dcache_parent(dentry); return; - - if (data.mountpoint) { - detach_mounts(data.mountpoint); - dput(data.mountpoint); } + had_submounts = true; + detach_mounts(victim); + dput(victim); } } EXPORT_SYMBOL(d_invalidate); @@ -1899,6 +1877,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode) } EXPORT_SYMBOL(d_instantiate); +/* + * This should be equivalent to d_instantiate() + unlock_new_inode(), + * with lockdep-related part of unlock_new_inode() done before + * anything else. Use that instead of open-coding d_instantiate()/ + * unlock_new_inode() combinations. + */ +void d_instantiate_new(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + BUG_ON(!inode); + lockdep_annotate_inode_mutex_key(inode); + security_d_instantiate(entry, inode); + spin_lock(&inode->i_lock); + __d_instantiate(entry, inode); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL(d_instantiate_new); + /** * d_instantiate_no_diralias - instantiate a non-aliased dentry * @entry: dentry to complete @@ -1941,35 +1941,6 @@ struct dentry *d_make_root(struct inode *root_inode) } EXPORT_SYMBOL(d_make_root); -static struct dentry * __d_find_any_alias(struct inode *inode) -{ - struct dentry *alias; - - if (hlist_empty(&inode->i_dentry)) - return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - __dget(alias); - return alias; -} - -/** - * d_find_any_alias - find any alias for a given inode - * @inode: inode to find an alias for - * - * If any aliases exist for the given inode, take and return a - * reference for one of them. If no aliases exist, return %NULL. - */ -struct dentry *d_find_any_alias(struct inode *inode) -{ - struct dentry *de; - - spin_lock(&inode->i_lock); - de = __d_find_any_alias(inode); - spin_unlock(&inode->i_lock); - return de; -} -EXPORT_SYMBOL(d_find_any_alias); - static struct dentry *__d_instantiate_anon(struct dentry *dentry, struct inode *inode, bool disconnected) @@ -3112,7 +3083,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) void d_genocide(struct dentry *parent) { - d_walk(parent, parent, d_genocide_kill, NULL); + d_walk(parent, parent, d_genocide_kill); } EXPORT_SYMBOL(d_genocide); diff --git a/fs/direct-io.c b/fs/direct-io.c index 874607bb6e02..093fb54cd316 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -432,8 +432,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, struct bio *bio; /* - * bio_alloc() is guaranteed to return a bio when called with - * __GFP_RECLAIM and we request a valid number of vectors. + * bio_alloc() is guaranteed to return a bio when allowed to sleep and + * we request a valid number of vectors. */ bio = bio_alloc(GFP_KERNEL, nr_vecs); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 97d17eaeba07..49121e5a8de2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iget_failed(ecryptfs_inode); goto out; } - unlock_new_inode(ecryptfs_inode); - d_instantiate(ecryptfs_dentry, ecryptfs_inode); + d_instantiate_new(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 3c6a9c156b7a..ddbf87246898 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -790,7 +790,7 @@ int ore_create(struct ore_io_state *ios) for (i = 0; i < ios->oc->numdevs; i++) { struct osd_request *or; - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -815,7 +815,7 @@ int ore_remove(struct ore_io_state *ios) for (i = 0; i < ios->oc->numdevs; i++) { struct osd_request *or; - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -847,7 +847,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, dev)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -966,7 +966,7 @@ int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp) return 0; /* Just an empty slot */ first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; - or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, first_dev)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; @@ -1060,7 +1060,7 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, cur_comp)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 179cd5c2f52a..719a3152da80 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -229,7 +229,7 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, u64 offset, void *p, unsigned length) { - struct osd_request *or = osd_start_request(od, GFP_KERNEL); + struct osd_request *or = osd_start_request(od); /* struct osd_sense_info osi = {.key = 0};*/ int ret; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1e01fabef130..71635909df3b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1264,21 +1264,11 @@ do_indirects: static void ext2_truncate_blocks(struct inode *inode, loff_t offset) { - /* - * XXX: it seems like a bug here that we don't allow - * IS_APPEND inode to have blocks-past-i_size trimmed off. - * review and fix this. - * - * Also would be nice to be able to handle IO errors and such, - * but that's probably too much to ask. - */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 55f7caadb093..152453a91877 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out: return err; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a42e71203e53..229ea4da6785 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2390,7 +2390,7 @@ extern int ext4_init_inode_table(struct super_block *sb, extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ -extern const struct file_operations ext4_seq_mb_groups_fops; +extern const struct seq_operations ext4_mb_seq_groups_ops; extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; extern int ext4_mb_init(struct super_block *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 769a62708b1c..6884e81c1465 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2254,7 +2254,7 @@ out: static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) @@ -2265,7 +2265,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group; ++*pos; @@ -2277,7 +2277,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; @@ -2330,34 +2330,13 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) { } -static const struct seq_operations ext4_mb_seq_groups_ops = { +const struct seq_operations ext4_mb_seq_groups_ops = { .start = ext4_mb_seq_groups_start, .next = ext4_mb_seq_groups_next, .stop = ext4_mb_seq_groups_stop, .show = ext4_mb_seq_groups_show, }; -static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) -{ - struct super_block *sb = PDE_DATA(inode); - int rc; - - rc = seq_open(file, &ext4_mb_seq_groups_ops); - if (rc == 0) { - struct seq_file *m = file->private_data; - m->private = sb; - } - return rc; - -} - -const struct file_operations ext4_seq_mb_groups_fops = { - .open = ext4_mb_seq_groups_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) { int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b1f21e3a0763..4a09063ce1d2 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } drop_nlink(inode); @@ -2651,8 +2650,7 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9ebd26c957c2..f34da0bb8f17 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -346,39 +346,9 @@ static struct kobject *ext4_root; static struct kobject *ext4_feat; -#define PROC_FILE_SHOW_DEFN(name) \ -static int name##_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ -} \ -\ -static const struct file_operations ext4_seq_##name##_fops = { \ - .open = name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -} - -#define PROC_FILE_LIST(name) \ - { __stringify(name), &ext4_seq_##name##_fops } - -PROC_FILE_SHOW_DEFN(es_shrinker_info); -PROC_FILE_SHOW_DEFN(options); - -static const struct ext4_proc_files { - const char *name; - const struct file_operations *fops; -} proc_files[] = { - PROC_FILE_LIST(options), - PROC_FILE_LIST(es_shrinker_info), - PROC_FILE_LIST(mb_groups), - { NULL, NULL }, -}; - int ext4_register_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const struct ext4_proc_files *p; int err; init_completion(&sbi->s_kobj_unregister); @@ -392,11 +362,14 @@ int ext4_register_sysfs(struct super_block *sb) if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - if (sbi->s_proc) { - for (p = proc_files; p->name; p++) - proc_create_data(p->name, S_IRUGO, sbi->s_proc, - p->fops, sb); + proc_create_single_data("options", S_IRUGO, sbi->s_proc, + ext4_seq_options_show, sb); + proc_create_single_data("es_shrinker_info", S_IRUGO, + sbi->s_proc, ext4_seq_es_shrinker_info_show, + sb); + proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_groups_ops, sb); } return 0; } @@ -404,13 +377,9 @@ int ext4_register_sysfs(struct super_block *sb) void ext4_unregister_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const struct ext4_proc_files *p; - if (sbi->s_proc) { - for (p = proc_files; p->name; p++) - remove_proc_entry(p->name, sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } + if (sbi->s_proc) + remove_proc_subtree(sb->s_id, ext4_proc_root); kobject_del(&sbi->s_kobj); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d5098efe577c..75e37fd720b2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, err = page_symlink(inode, disk_link.name, disk_link.len); err_out: - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); /* * Let's flush symlink data in order to avoid broken symlink as much as @@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index f33a56d6e6dd..4b47ca6296a7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -572,23 +572,6 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset) return 0; } -#define F2FS_PROC_FILE_DEF(_name) \ -static int _name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ -} \ - \ -static const struct file_operations f2fs_seq_##_name##_fops = { \ - .open = _name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -F2FS_PROC_FILE_DEF(segment_info); -F2FS_PROC_FILE_DEF(segment_bits); -F2FS_PROC_FILE_DEF(iostat_info); - int __init f2fs_init_sysfs(void) { int ret; @@ -632,12 +615,12 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_iostat_info_fops, sb); + proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc, + segment_info_seq_show, sb); + proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc, + segment_bits_seq_show, sb); + proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc, + iostat_info_seq_show, sb); } return 0; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 582ca731a6c9..484ce674e0cd 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -314,10 +314,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) int err; mutex_lock(&MSDOS_SB(sb)->s_lock); - /* - * Check whether the directory is not in use, then check - * whether it is empty. - */ err = fat_dir_empty(inode); if (err) goto out; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 2649759c478a..4f4362d5a04c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -697,15 +697,6 @@ static int vfat_find(struct inode *dir, const struct qstr *qname, return fat_search_long(dir, qname->name, len, sinfo); } -/* - * (nfsd's) anonymous disconnected dentry? - * NOTE: !IS_ROOT() is not anonymous (I.e. d_splice_alias() did the job). - */ -static int vfat_d_anon_disconn(struct dentry *dentry) -{ - return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED); -} - static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -738,8 +729,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * Checking "alias->d_parent == dentry->d_parent" to make sure * FS is not corrupted (especially double linked dir). */ - if (alias && alias->d_parent == dentry->d_parent && - !vfat_d_anon_disconn(alias)) { + if (alias && alias->d_parent == dentry->d_parent) { /* * This inode has non anonymous-DCACHE_DISCONNECTED * dentry. This means, the user did ->lookup() by an @@ -747,7 +737,6 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * * Switch to new one for reason of locality if possible. */ - BUG_ON(d_unhashed(alias)); if (!S_ISDIR(inode->i_mode)) d_move(alias, dentry); iput(inode); diff --git a/fs/filesystems.c b/fs/filesystems.c index f2728a4a03a1..b03f57b1105b 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -238,21 +238,9 @@ static int filesystems_proc_show(struct seq_file *m, void *v) return 0; } -static int filesystems_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, filesystems_proc_show, NULL); -} - -static const struct file_operations filesystems_proc_fops = { - .open = filesystems_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_filesystems_init(void) { - proc_create("filesystems", 0, NULL, &filesystems_proc_fops); + proc_create_single("filesystems", 0, NULL, filesystems_proc_show); return 0; } module_init(proc_filesystems_init); diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c index 15a3d042247e..9a13e9e15b69 100644 --- a/fs/fscache/histogram.c +++ b/fs/fscache/histogram.c @@ -83,24 +83,9 @@ static void fscache_histogram_stop(struct seq_file *m, void *v) { } -static const struct seq_operations fscache_histogram_ops = { +const struct seq_operations fscache_histogram_ops = { .start = fscache_histogram_start, .stop = fscache_histogram_stop, .next = fscache_histogram_next, .show = fscache_histogram_show, }; - -/* - * open "/proc/fs/fscache/histogram" to provide latency data - */ -static int fscache_histogram_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &fscache_histogram_ops); -} - -const struct file_operations fscache_histogram_fops = { - .open = fscache_histogram_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 500650f938fe..f83328a7f048 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -31,6 +31,7 @@ #include <linux/fscache-cache.h> #include <trace/events/fscache.h> #include <linux/sched.h> +#include <linux/seq_file.h> #define FSCACHE_MIN_THREADS 4 #define FSCACHE_MAX_THREADS 32 @@ -84,7 +85,7 @@ static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) atomic_inc(&histogram[jif]); } -extern const struct file_operations fscache_histogram_fops; +extern const struct seq_operations fscache_histogram_ops; #else #define fscache_hist(hist, start_jif) do {} while (0) @@ -294,7 +295,7 @@ static inline void fscache_stat_d(atomic_t *stat) #define __fscache_stat(stat) (stat) -extern const struct file_operations fscache_stats_fops; +int fscache_stats_show(struct seq_file *m, void *v); #else #define __fscache_stat(stat) (NULL) diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index 1d9e4951a597..49a8c90414bc 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -26,14 +26,14 @@ int __init fscache_proc_init(void) goto error_dir; #ifdef CONFIG_FSCACHE_STATS - if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, - &fscache_stats_fops)) + if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL, + fscache_stats_show)) goto error_stats; #endif #ifdef CONFIG_FSCACHE_HISTOGRAM - if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, - &fscache_histogram_fops)) + if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL, + &fscache_histogram_ops)) goto error_histogram; #endif diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index fcc8c2f2690e..00564a1dfd76 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -138,7 +138,7 @@ atomic_t fscache_n_cache_culled_objects; /* * display the general statistics */ -static int fscache_stats_show(struct seq_file *m, void *v) +int fscache_stats_show(struct seq_file *m, void *v) { seq_puts(m, "FS-Cache statistics\n"); @@ -284,18 +284,3 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cache_culled_objects)); return 0; } - -/* - * open "/proc/fs/fscache/stats" allowing provision of a statistical summary - */ -static int fscache_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, fscache_stats_show, NULL); -} - -const struct file_operations fscache_stats_fops = { - .open = fscache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; diff --git a/fs/inode.c b/fs/inode.c index 13ceb98c3bd3..3b55391072f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; diff --git a/fs/internal.h b/fs/internal.h index e08972db0303..980d005b21b4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -125,6 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); +extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file *filp_clone_open(struct file *); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 0a754f38462e..e5a6deb38e1e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index a70907606025..35a5b2a81ae0 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -29,7 +29,6 @@ #ifdef PROC_FS_JFS /* see jfs_debug.h */ -static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG static int jfs_loglevel_proc_show(struct seq_file *m, void *v) { @@ -66,43 +65,29 @@ static const struct file_operations jfs_loglevel_proc_fops = { }; #endif -static struct { - const char *name; - const struct file_operations *proc_fops; -} Entries[] = { -#ifdef CONFIG_JFS_STATISTICS - { "lmstats", &jfs_lmstats_proc_fops, }, - { "txstats", &jfs_txstats_proc_fops, }, - { "xtstat", &jfs_xtstat_proc_fops, }, - { "mpstat", &jfs_mpstat_proc_fops, }, -#endif -#ifdef CONFIG_JFS_DEBUG - { "TxAnchor", &jfs_txanchor_proc_fops, }, - { "loglevel", &jfs_loglevel_proc_fops } -#endif -}; -#define NPROCENT ARRAY_SIZE(Entries) - void jfs_proc_init(void) { - int i; + struct proc_dir_entry *base; - if (!(base = proc_mkdir("fs/jfs", NULL))) + base = proc_mkdir("fs/jfs", NULL); + if (!base) return; - for (i = 0; i < NPROCENT; i++) - proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); +#ifdef CONFIG_JFS_STATISTICS + proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show); + proc_create_single("txstats", 0, base, jfs_txstats_proc_show); + proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show); + proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show); +#endif +#ifdef CONFIG_JFS_DEBUG + proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show); + proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops); +#endif } void jfs_proc_clean(void) { - int i; - - if (base) { - for (i = 0; i < NPROCENT; i++) - remove_proc_entry(Entries[i].name, base); - remove_proc_entry("fs/jfs", NULL); - } + remove_proc_subtree("fs/jfs", NULL); } #endif /* PROC_FS_JFS */ diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index eafd1300a00b..0d9e35da8462 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -62,7 +62,7 @@ extern void jfs_proc_clean(void); extern int jfsloglevel; -extern const struct file_operations jfs_txanchor_proc_fops; +int jfs_txanchor_proc_show(struct seq_file *m, void *v); /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -105,10 +105,10 @@ extern const struct file_operations jfs_txanchor_proc_fops; * ---------- */ #ifdef CONFIG_JFS_STATISTICS -extern const struct file_operations jfs_lmstats_proc_fops; -extern const struct file_operations jfs_txstats_proc_fops; -extern const struct file_operations jfs_mpstat_proc_fops; -extern const struct file_operations jfs_xtstat_proc_fops; +int jfs_lmstats_proc_show(struct seq_file *m, void *v); +int jfs_txstats_proc_show(struct seq_file *m, void *v); +int jfs_mpstat_proc_show(struct seq_file *m, void *v); +int jfs_xtstat_proc_show(struct seq_file *m, void *v); #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 0e5d412c0b01..6b68df395892 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2493,7 +2493,7 @@ exit: } #ifdef CONFIG_JFS_STATISTICS -static int jfs_lmstats_proc_show(struct seq_file *m, void *v) +int jfs_lmstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Logmgr stats\n" @@ -2510,16 +2510,4 @@ static int jfs_lmstats_proc_show(struct seq_file *m, void *v) lmStat.partial_page); return 0; } - -static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_lmstats_proc_show, NULL); -} - -const struct file_operations jfs_lmstats_proc_fops = { - .open = jfs_lmstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_JFS_STATISTICS */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 1a3b0cc22ad3..fa2c6824c7f2 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -815,7 +815,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) } #ifdef CONFIG_JFS_STATISTICS -static int jfs_mpstat_proc_show(struct seq_file *m, void *v) +int jfs_mpstat_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Metapage statistics\n" @@ -828,16 +828,4 @@ static int jfs_mpstat_proc_show(struct seq_file *m, void *v) mpStat.lockwait); return 0; } - -static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_mpstat_proc_show, NULL); -} - -const struct file_operations jfs_mpstat_proc_fops = { - .open = jfs_mpstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 4d973524c887..a5663cb621d8 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2998,7 +2998,7 @@ int jfs_sync(void *arg) } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) -static int jfs_txanchor_proc_show(struct seq_file *m, void *v) +int jfs_txanchor_proc_show(struct seq_file *m, void *v) { char *freewait; char *freelockwait; @@ -3032,22 +3032,10 @@ static int jfs_txanchor_proc_show(struct seq_file *m, void *v) list_empty(&TxAnchor.unlock_queue) ? "" : "not "); return 0; } - -static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txanchor_proc_show, NULL); -} - -const struct file_operations jfs_txanchor_proc_fops = { - .open = jfs_txanchor_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) -static int jfs_txstats_proc_show(struct seq_file *m, void *v) +int jfs_txstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS TxStats\n" @@ -3072,16 +3060,4 @@ static int jfs_txstats_proc_show(struct seq_file *m, void *v) TxStat.txLockAlloc_freelock); return 0; } - -static int jfs_txstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txstats_proc_show, NULL); -} - -const struct file_operations jfs_txstats_proc_fops = { - .open = jfs_txstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5cde6d2fcfca..2c200b5256a6 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -3874,7 +3874,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } #ifdef CONFIG_JFS_STATISTICS -static int jfs_xtstat_proc_show(struct seq_file *m, void *v) +int jfs_xtstat_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Xtree statistics\n" @@ -3887,16 +3887,4 @@ static int jfs_xtstat_proc_show(struct seq_file *m, void *v) xtStat.split); return 0; } - -static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_xtstat_proc_show, NULL); -} - -const struct file_operations jfs_xtstat_proc_fops = { - .open = jfs_xtstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b41596d71858..56c3fcbfe80e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out1: diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 26dd9a50f383..ff2716f9322e 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, info->root = root; info->ns = ns; + INIT_LIST_HEAD(&info->node); sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, &init_user_ns, info); diff --git a/fs/locks.c b/fs/locks.c index 62bbe8b31f26..05e211be8684 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2788,22 +2788,10 @@ static const struct seq_operations locks_seq_operations = { .show = locks_show, }; -static int locks_open(struct inode *inode, struct file *filp) -{ - return seq_open_private(filp, &locks_seq_operations, - sizeof(struct locks_iterator)); -} - -static const struct file_operations proc_locks_operations = { - .open = locks_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static int __init proc_locks_init(void) { - proc_create("locks", 0, NULL, &proc_locks_operations); + proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, + sizeof(struct locks_iterator), NULL); return 0; } fs_initcall(proc_locks_init); diff --git a/fs/namei.c b/fs/namei.c index 186bd2464fd5..a59968de1636 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1438,10 +1438,8 @@ static int path_parent_directory(struct path *path) static int follow_dotdot(struct nameidata *nd) { while(1) { - if (nd->path.dentry == nd->root.dentry && - nd->path.mnt == nd->root.mnt) { + if (path_equal(&nd->path, &nd->root)) break; - } if (nd->path.dentry != nd->path.mnt->mnt_root) { int ret = path_parent_directory(&nd->path); if (ret) @@ -3367,7 +3365,9 @@ finish_open_created: goto out; *opened |= FILE_OPENED; opened: - error = ima_file_check(file, op->acc_mode, *opened); + error = open_check_o_direct(file); + if (!error) + error = ima_file_check(file, op->acc_mode, *opened); if (!error && will_truncate) error = handle_truncate(file); out: @@ -3447,6 +3447,9 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = finish_open(file, child, NULL, opened); if (error) goto out2; + error = open_check_o_direct(file); + if (error) + fput(file); out2: mnt_drop_write(path.mnt); out: @@ -3847,11 +3850,11 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; - shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; + shrink_dcache_parent(dentry); dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); @@ -4434,8 +4437,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir->i_nlink >= max_links) goto out; } - if (is_dir && !(flags & RENAME_EXCHANGE) && target) - shrink_dcache_parent(new_dentry); if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) @@ -4452,8 +4453,10 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; if (!(flags & RENAME_EXCHANGE) && target) { - if (is_dir) + if (is_dir) { + shrink_dcache_parent(new_dentry); target->i_flags |= S_DEAD; + } dont_mount(new_dentry); detach_mounts(new_dentry); } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b9129e2befea..bbc91d7ca1bd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1067,7 +1067,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_server_list_stop(struct seq_file *p, void *v); @@ -1080,14 +1079,6 @@ static const struct seq_operations nfs_server_list_ops = { .show = nfs_server_list_show, }; -static const struct file_operations nfs_server_list_fops = { - .open = nfs_server_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int nfs_volume_list_open(struct inode *inode, struct file *file); static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_volume_list_stop(struct seq_file *p, void *v); @@ -1100,23 +1091,6 @@ static const struct seq_operations nfs_volume_list_ops = { .show = nfs_volume_list_show, }; -static const struct file_operations nfs_volume_list_fops = { - .open = nfs_volume_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* - * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which - * we're dealing - */ -static int nfs_server_list_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nfs_server_list_ops, - sizeof(struct seq_net_private)); -} - /* * set up the iterator to start reading from the server list and return the first item */ @@ -1185,15 +1159,6 @@ static int nfs_server_list_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes - */ -static int nfs_volume_list_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nfs_volume_list_ops, - sizeof(struct seq_net_private)); -} - -/* * set up the iterator to start reading from the volume list and return the first item */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) @@ -1278,14 +1243,14 @@ int nfs_fs_proc_net_init(struct net *net) goto error_0; /* a file of servers with which we're dealing */ - p = proc_create("servers", S_IFREG|S_IRUGO, - nn->proc_nfsfs, &nfs_server_list_fops); + p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs, + &nfs_server_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; /* a file of volumes that we have mounted */ - p = proc_create("volumes", S_IFREG|S_IRUGO, - nn->proc_nfsfs, &nfs_volume_list_fops); + p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs, + &nfs_volume_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; return 0; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 70b8bf781fce..a43dfedd69ec 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -227,7 +227,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, if (!buf) return -ENOMEM; - rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { error = -ENOMEM; goto out_free_buf; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2410b093a2e6..b0555d7d8200 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, break; case S_IFDIR: host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + if (!host_err && unlikely(d_unhashed(dchild))) { + struct dentry *d; + d = lookup_one_len(dchild->d_name.name, + dchild->d_parent, + dchild->d_name.len); + if (IS_ERR(d)) { + host_err = PTR_ERR(d); + break; + } + if (unlikely(d_is_negative(d))) { + dput(d); + err = nfserr_serverfault; + goto out; + } + dput(resfhp->fh_dentry); + resfhp->fh_dentry = dget(d); + err = fh_update(resfhp); + dput(dchild); + dchild = d; + if (err) + goto out; + } break; case S_IFCHR: case S_IFBLK: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1a2894aa0194..dd52d3f82e8d 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; nilfs_mark_inode_dirty(inode); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 91a8889abf9b..ea8c551bcd7e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, current_page, vec_len, vec_start); len = bio_add_page(bio, page, vec_len, vec_start); - if (len != vec_len) { - mlog(ML_ERROR, "Adding page[%d] to bio failed, " - "page %p, len %d, vec_len %u, vec_start %u, " - "bi_sector %llu\n", current_page, page, len, - vec_len, vec_start, - (unsigned long long)bio->bi_iter.bi_sector); - bio_put(bio); - bio = ERR_PTR(-EIO); - return bio; - } + if (len != vec_len) break; cs += vec_len / (PAGE_SIZE/spp); vec_start = 0; diff --git a/fs/open.c b/fs/open.c index c5ee7cd60424..d0e955b558ad 100644 --- a/fs/open.c +++ b/fs/open.c @@ -724,6 +724,16 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } +int open_check_o_direct(struct file *f) +{ + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) + return -EINVAL; + } + return 0; +} + static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), @@ -745,7 +755,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; - goto done; + return 0; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { @@ -798,12 +808,7 @@ static int do_dentry_open(struct file *f, f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); -done: - /* NB: we're sure to have correct a_ops only after f_op->open */ - error = -EINVAL; - if ((f->f_flags & O_DIRECT) && - (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)) - goto out_fput; + return 0; cleanup_all: @@ -818,9 +823,6 @@ cleanup_file: f->f_path.dentry = NULL; f->f_inode = NULL; return error; -out_fput: - fput(f); - return error; } /** @@ -918,14 +920,20 @@ struct file *dentry_open(const struct path *path, int flags, BUG_ON(!path->mnt); f = get_empty_filp(); - if (IS_ERR(f)) - return f; - - f->f_flags = flags; - error = vfs_open(path, f, cred); - if (error) { - put_filp(f); - return ERR_PTR(error); + if (!IS_ERR(f)) { + f->f_flags = flags; + error = vfs_open(path, f, cred); + if (!error) { + /* from now on we need fput() to dispose of f */ + error = open_check_o_direct(f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } else { + put_filp(f); + f = ERR_PTR(error); + } } return f; } diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 6e3134e6d98a..1b5707c44c3f 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir, get_khandle_from_ino(inode), dentry); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir, "Assigned symlink inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode "Assigned dir inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; diff --git a/fs/proc/array.c b/fs/proc/array.c index ae2c807fd719..e6d7f41b6684 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -85,6 +85,7 @@ #include <linux/delayacct.h> #include <linux/seq_file.h> #include <linux/pid_namespace.h> +#include <linux/prctl.h> #include <linux/ptrace.h> #include <linux/tracehook.h> #include <linux/string_helpers.h> @@ -335,6 +336,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_printf(m, "\nSpeculation_Store_Bypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } seq_putc(m, '\n'); } @@ -677,25 +702,22 @@ out: static int children_seq_show(struct seq_file *seq, void *v) { - struct inode *inode = seq->private; - pid_t pid; - - pid = pid_nr_ns(v, inode->i_sb->s_fs_info); - seq_printf(seq, "%d ", pid); + struct inode *inode = file_inode(seq->file); + seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode))); return 0; } static void *children_seq_start(struct seq_file *seq, loff_t *pos) { - return get_children_pid(seq->private, NULL, *pos); + return get_children_pid(file_inode(seq->file), NULL, *pos); } static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pid *pid; - pid = get_children_pid(seq->private, v, *pos + 1); + pid = get_children_pid(file_inode(seq->file), v, *pos + 1); put_pid(v); ++*pos; @@ -716,17 +738,7 @@ static const struct seq_operations children_seq_ops = { static int children_seq_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - - ret = seq_open(file, &children_seq_ops); - if (ret) - return ret; - - m = file->private_data; - m->private = inode; - - return ret; + return seq_open(file, &children_seq_ops); } const struct file_operations proc_tid_children_operations = { diff --git a/fs/proc/base.c b/fs/proc/base.c index 1a76d751cf3c..4e35593546b1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -698,7 +698,7 @@ static bool has_pid_permissions(struct pid_namespace *pid, static int proc_pid_permission(struct inode *inode, int mask) { - struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; bool has_perms; @@ -733,13 +733,11 @@ static const struct inode_operations proc_def_inode_operations = { static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns; - struct pid *pid; + struct pid_namespace *ns = proc_pid_ns(inode); + struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; - ns = inode->i_sb->s_fs_info; - pid = proc_pid(inode); task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; @@ -1410,7 +1408,7 @@ static const struct file_operations proc_fail_nth_operations = { static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); struct task_struct *p; p = get_proc_task(inode); @@ -1782,8 +1780,8 @@ int pid_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; - struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -2337,7 +2335,7 @@ static int proc_timers_open(struct inode *inode, struct file *file) return -ENOMEM; tp->pid = proc_pid(inode); - tp->ns = inode->i_sb->s_fs_info; + tp->ns = proc_pid_ns(inode); return 0; } @@ -3239,7 +3237,7 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(file_inode(file)); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) @@ -3588,7 +3586,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = inode->i_sb->s_fs_info; + ns = proc_pid_ns(inode); tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 8233e7af9389..fa762c5fbcb2 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v) return 0; } -static int cmdline_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cmdline_proc_show, NULL); -} - -static const struct file_operations cmdline_proc_fops = { - .open = cmdline_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_cmdline_init(void) { - proc_create("cmdline", 0, NULL, &cmdline_proc_fops); + proc_create_single("cmdline", 0, NULL, cmdline_proc_show); return 0; } fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index a8ac48aebd59..954caf0b7fee 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = { .show = show_console_dev }; -static int consoles_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &consoles_op); -} - -static const struct file_operations proc_consoles_operations = { - .open = consoles_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_consoles_init(void) { - proc_create("consoles", 0, NULL, &proc_consoles_operations); + proc_create_seq("consoles", 0, NULL, &consoles_op); return 0; } fs_initcall(proc_consoles_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index 2c7f22b14489..37d38697eaf8 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = { .show = devinfo_show }; -static int devinfo_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &devinfo_ops); -} - -static const struct file_operations proc_devinfo_operations = { - .open = devinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_devices_init(void) { - proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create_seq("devices", 0, NULL, &devinfo_ops); return 0; } fs_initcall(proc_devices_init); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2078e70e1595..02bb1914f5f7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/uaccess.h> +#include <linux/seq_file.h> #include "internal.h" @@ -346,13 +347,12 @@ static const struct inode_operations proc_dir_inode_operations = { .setattr = proc_notify_change, }; -static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +/* returns the registered entry, or frees dp and returns NULL on failure */ +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp) { - int ret; - - ret = proc_alloc_inum(&dp->low_ino); - if (ret) - return ret; + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; write_lock(&proc_subdir_lock); dp->parent = dir; @@ -360,12 +360,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); write_unlock(&proc_subdir_lock); - proc_free_inum(dp->low_ino); - return -EEXIST; + goto out_free_inum; } write_unlock(&proc_subdir_lock); - return 0; + return dp; +out_free_inum: + proc_free_inum(dp->low_ino); +out_free_entry: + pde_free(dp); + return NULL; } static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, @@ -443,10 +447,7 @@ struct proc_dir_entry *proc_symlink(const char *name, if (ent->data) { strcpy((char*)ent->data,dest); ent->proc_iops = &proc_link_inode_operations; - if (proc_register(parent, ent) < 0) { - pde_free(ent); - ent = NULL; - } + ent = proc_register(parent, ent); } else { pde_free(ent); ent = NULL; @@ -470,11 +471,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } @@ -505,47 +504,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->proc_fops = NULL; ent->proc_iops = NULL; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } EXPORT_SYMBOL(proc_create_mount_point); -struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops, - void *data) +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data) { - struct proc_dir_entry *pde; + struct proc_dir_entry *p; + if ((mode & S_IFMT) == 0) mode |= S_IFREG; - - if (!S_ISREG(mode)) { - WARN_ON(1); /* use proc_mkdir() */ + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; + + p = __proc_create(parent, name, mode, 1); + if (p) { + p->proc_iops = &proc_file_inode_operations; + p->data = data; } + return p; +} + +struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, void *data) +{ + struct proc_dir_entry *p; BUG_ON(proc_fops == NULL); - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO; - pde = __proc_create(&parent, name, mode, 1); - if (!pde) - goto out; - pde->proc_fops = proc_fops; - pde->data = data; - pde->proc_iops = &proc_file_inode_operations; - if (proc_register(parent, pde) < 0) - goto out_free; - return pde; -out_free: - pde_free(pde); -out: - return NULL; + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = proc_fops; + return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -557,6 +556,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode, } EXPORT_SYMBOL(proc_create); +static int proc_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + if (de->state_size) + return seq_open_private(file, de->seq_ops, de->state_size); + return seq_open(file, de->seq_ops); +} + +static const struct file_operations proc_seq_fops = { + .open = proc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_seq_private); + +static int proc_single_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + return single_open(file, de->single_show, de->data); +} + +static const struct file_operations proc_single_fops = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_single_data); + void proc_set_size(struct proc_dir_entry *de, loff_t size) { de->size = size; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 0f1692e63cb6..a318ae5b36b4 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,7 +44,12 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + union { + const struct seq_operations *seq_ops; + int (*single_show)(struct seq_file *, void *); + }; void *data; + unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; @@ -57,9 +62,9 @@ struct proc_dir_entry { umode_t mode; u8 namelen; #ifdef CONFIG_64BIT -#define SIZEOF_PDE_INLINE_NAME (192-139) +#define SIZEOF_PDE_INLINE_NAME (192-155) #else -#define SIZEOF_PDE_INLINE_NAME (128-87) +#define SIZEOF_PDE_INLINE_NAME (128-95) #endif char inline_name[SIZEOF_PDE_INLINE_NAME]; } __randomize_layout; @@ -162,6 +167,10 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i /* * generic.c */ +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data); +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c index 6a6bee9c603c..cb0edc7cbf09 100644 --- a/fs/proc/interrupts.c +++ b/fs/proc/interrupts.c @@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = { .show = show_interrupts }; -static int interrupts_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &int_seq_ops); -} - -static const struct file_operations proc_interrupts_operations = { - .open = interrupts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_interrupts_init(void) { - proc_create("interrupts", 0, NULL, &proc_interrupts_operations); + proc_create_seq("interrupts", 0, NULL, &int_seq_ops); return 0; } fs_initcall(proc_interrupts_init); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index b572cc865b92..d06694757201 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -28,21 +28,9 @@ static int loadavg_proc_show(struct seq_file *m, void *v) return 0; } -static int loadavg_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, loadavg_proc_show, NULL); -} - -static const struct file_operations loadavg_proc_fops = { - .open = loadavg_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_loadavg_init(void) { - proc_create("loadavg", 0, NULL, &loadavg_proc_fops); + proc_create_single("loadavg", 0, NULL, loadavg_proc_show); return 0; } fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 65a72ab57471..2fb04846ed11 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) return 0; } -static int meminfo_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, meminfo_proc_show, NULL); -} - -static const struct file_operations meminfo_proc_fops = { - .open = meminfo_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_meminfo_init(void) { - proc_create("meminfo", 0, NULL, &meminfo_proc_fops); + proc_create_single("meminfo", 0, NULL, meminfo_proc_show); return 0; } fs_initcall(proc_meminfo_init); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 75634379f82e..3b63be64e436 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = { .show = nommu_region_list_show }; -static int proc_nommu_region_list_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_nommu_region_list_seqop); -} - -static const struct file_operations proc_nommu_region_list_operations = { - .open = proc_nommu_region_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_nommu_init(void) { - proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); + proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop); return 0; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 1763f370489d..7d94fa005b0d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } -int seq_open_net(struct inode *ino, struct file *f, - const struct seq_operations *ops, int size) +static int seq_open_net(struct inode *inode, struct file *file) { - struct net *net; + unsigned int state_size = PDE(inode)->state_size; struct seq_net_private *p; + struct net *net; - BUG_ON(size < sizeof(*p)); + WARN_ON_ONCE(state_size < sizeof(*p)); - net = get_proc_net(ino); - if (net == NULL) + net = get_proc_net(inode); + if (!net) return -ENXIO; - p = __seq_open_private(f, ops, size); - if (p == NULL) { + p = __seq_open_private(file, PDE(inode)->seq_ops, state_size); + if (!p) { put_net(net); return -ENOMEM; } @@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f, #endif return 0; } -EXPORT_SYMBOL_GPL(seq_open_net); -int single_open_net(struct inode *inode, struct file *file, - int (*show)(struct seq_file *, void *)) +static int seq_release_net(struct inode *ino, struct file *f) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, show, net); - if (err < 0) - goto err_open; + struct seq_file *seq = f->private_data; + put_net(seq_file_net(seq)); + seq_release_private(ino, f); return 0; +} -err_open: - put_net(net); -err_net: - return err; +static const struct file_operations proc_net_seq_fops = { + .open = seq_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); } -EXPORT_SYMBOL_GPL(single_open_net); +EXPORT_SYMBOL_GPL(proc_create_net_data); -int seq_release_net(struct inode *ino, struct file *f) +static int single_open_net(struct inode *inode, struct file *file) { - struct seq_file *seq; + struct proc_dir_entry *de = PDE(inode); + struct net *net; + int err; - seq = f->private_data; + net = get_proc_net(inode); + if (!net) + return -ENXIO; - put_net(seq_file_net(seq)); - seq_release_private(ino, f); - return 0; + err = single_open(file, de->single_show, net); + if (err) + put_net(net); + return err; } -EXPORT_SYMBOL_GPL(seq_release_net); -int single_release_net(struct inode *ino, struct file *f) +static int single_release_net(struct inode *ino, struct file *f) { struct seq_file *seq = f->private_data; put_net(seq->private); return single_release(ino, f); } -EXPORT_SYMBOL_GPL(single_release_net); + +static const struct file_operations proc_net_single_fops = { + .open = single_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_single); static struct net *get_proc_task_net(struct inode *dir) { diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index d0cf1c50bb6c..c69ff191e5d8 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = { .show = show_tty_driver }; -static int tty_drivers_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_drivers_op); -} - -static const struct file_operations proc_tty_drivers_operations = { - .open = tty_drivers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> @@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver) struct proc_dir_entry *ent; if (!driver->driver_name || driver->proc_entry || - !driver->ops->proc_fops) + !driver->ops->proc_show) return; - ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, - driver->ops->proc_fops, driver); + ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_show, driver); driver->proc_entry = ent; } @@ -186,6 +174,6 @@ void __init proc_tty_init(void) * entry. */ proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); - proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); - proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); + proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops); + proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op); } diff --git a/fs/proc/self.c b/fs/proc/self.c index 4d7d061696b3..127265e5c55f 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *self; inode_lock(root_inode); diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 24072cc06e65..12901dcf57e2 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v) return 0; } -static int softirqs_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_softirqs, NULL); -} - -static const struct file_operations proc_softirqs_operations = { - .open = softirqs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_softirqs_init(void) { - proc_create("softirqs", 0, NULL, &proc_softirqs_operations); + proc_create_single("softirqs", 0, NULL, show_softirqs); return 0; } fs_initcall(proc_softirqs_init); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 9d2efaca499f..b905010ca9eb 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *thread_self; inode_lock(root_inode); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 95a708d83721..3bd12f955867 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v) return 0; } -static int uptime_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uptime_proc_show, NULL); -} - -static const struct file_operations uptime_proc_fops = { - .open = uptime_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_uptime_init(void) { - proc_create("uptime", 0, NULL, &uptime_proc_fops); + proc_create_single("uptime", 0, NULL, uptime_proc_show); return 0; } fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index 94901e8e700d..b449f186577f 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v) return 0; } -static int version_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, version_proc_show, NULL); -} - -static const struct file_operations version_proc_fops = { - .open = version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_version_init(void) { - proc_create("version", 0, NULL, &version_proc_fops); + proc_create_single("version", 0, NULL, version_proc_show); return 0; } fs_initcall(proc_version_init); diff --git a/fs/read_write.c b/fs/read_write.c index c4eabbfc90df..e83bd9744b5d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -2023,7 +2023,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) ret = mnt_want_write_file(dst_file); if (ret) { info->status = ret; - goto next_loop; + goto next_fdput; } dst_off = info->dest_offset; @@ -2058,9 +2058,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) next_file: mnt_drop_write_file(dst_file); -next_loop: +next_fdput: fdput(dst_fd); - +next_loop: if (fatal_signal_pending(current)) goto out; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index bd39a998843d..5089dac02660 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the above add_entry did not update dir's stat data */ reiserfs_update_sd(&th, dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(dir->i_sb); @@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index fe999157dd97..e39b3910d24d 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -389,27 +389,13 @@ static int show_journal(struct seq_file *m, void *unused) return 0; } -static int r_open(struct inode *inode, struct file *file) -{ - return single_open(file, PDE_DATA(inode), - proc_get_parent_data(inode)); -} - -static const struct file_operations r_file_operations = { - .open = r_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, int (*func) (struct seq_file *, void *)) { - proc_create_data(name, 0, REISERFS_SB(sb)->procdir, - &r_file_operations, func); + proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb); } int reiserfs_proc_info_init(struct super_block *sb) diff --git a/fs/seq_file.c b/fs/seq_file.c index c6c27f1f9c98..4cc090b50cc5 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, if (m->count + width >= m->size) goto overflow; - if (num < 10) { - m->buf[m->count++] = num + '0'; - return; - } - len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; diff --git a/fs/super.c b/fs/super.c index 122c402049a2..50728d9c1a05 100644 --- a/fs/super.c +++ b/fs/super.c @@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); /* - * Don't call trylock_super as it is a potential - * scalability bottleneck. The counts could get updated - * between super_cache_count and super_cache_scan anyway. - * Call to super_cache_count with shrinker_rwsem held - * ensures the safety of call to list_lru_shrink_count() and - * s_op->nr_cached_objects(). + * We don't call trylock_super() here as it is a scalability bottleneck, + * so we're exposed to partial setup state. The shrinker rwsem does not + * protect filesystem operations backing list_lru_shrink_count() or + * s_op->nr_cached_objects(). Counts can change between + * super_cache_count and super_cache_scan, so we really don't need locks + * here. + * + * However, if we are currently mounting the superblock, the underlying + * filesystem might be in a state of partial construction and hence it + * is dangerous to access it. trylock_super() uses a SB_BORN check to + * avoid this situation, so do the same here. The memory barrier is + * matched with the one in mount_fs() as we don't hold locks here. */ + if (!(sb->s_flags & SB_BORN)) + return 0; + smp_rmb(); + if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); @@ -937,7 +947,7 @@ void emergency_remount(void) static void do_thaw_all_callback(struct super_block *sb) { down_write(&sb->s_umount); - if (sb->s_root && sb->s_flags & MS_BORN) { + if (sb->s_root && sb->s_flags & SB_BORN) { emergency_thaw_bdev(sb); thaw_super_locked(sb); } else { @@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); + + /* + * Write barrier is for super_cache_count(). We place it before setting + * SB_BORN as the data dependency between the two functions is the + * superblock structure contents that we just set up, not the SB_BORN + * flag. + */ + smp_wmb(); sb->s_flags |= SB_BORN; error = security_sb_kern_mount(sb, flags, secdata); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index b428d317ae92..92682fcc41f6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; - bool new_sb; + bool new_sb = false; if (!(flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) @@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); root = kernfs_mount_ns(fs_type, flags, sysfs_root, SYSFS_MAGIC, &new_sb, ns); - if (IS_ERR(root) || !new_sb) + if (!new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (new_sb) + else if (!IS_ERR(root)) root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; return root; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0458dd47e105..c586026508db 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } @@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(dir); dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 32545cd00ceb..d5f43ba76c59 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ufs_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; out_fail: diff --git a/fs/xattr.c b/fs/xattr.c index 61cd28ba25f3..f9cb1db187b7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -229,7 +229,7 @@ out: } EXPORT_SYMBOL_GPL(vfs_setxattr); -ssize_t +static ssize_t xattr_getsecurity(struct inode *inode, const char *name, void *value, size_t size) { @@ -254,7 +254,6 @@ out: out_noalloc: return len; } -EXPORT_SYMBOL_GPL(xattr_getsecurity); /* * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr @@ -354,7 +353,6 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size) if (error) return error; if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) { - error = -EOPNOTSUPP; error = inode->i_op->listxattr(dentry, list, size); } else { error = security_inode_listsecurity(inode, list, size); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0ab824f574ed..102463543db3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -594,7 +594,7 @@ xfs_alloc_ioend( struct xfs_ioend *ioend; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); xfs_init_bio_from_bh(bio, bh); ioend = container_of(bio, struct xfs_ioend, io_inline_bio); diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 69346d460dfa..694c85b03813 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -18,7 +18,7 @@ #ifndef __XFS_AOPS_H__ #define __XFS_AOPS_H__ -extern struct bio_set *xfs_ioend_bioset; +extern struct bio_set xfs_ioend_bioset; /* * Types of I/O for bmap clustering and I/O completion tracking. diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 056e12b421eb..1cc79907b377 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -113,6 +113,7 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats) } } +#ifdef CONFIG_PROC_FS /* legacy quota interfaces */ #ifdef CONFIG_XFS_QUOTA static int xqm_proc_show(struct seq_file *m, void *v) @@ -124,18 +125,6 @@ static int xqm_proc_show(struct seq_file *m, void *v) return 0; } -static int xqm_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqm_proc_show, NULL); -} - -static const struct file_operations xqm_proc_fops = { - .open = xqm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* legacy quota stats interface no 2 */ static int xqmstat_proc_show(struct seq_file *m, void *v) { @@ -147,22 +136,8 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); return 0; } - -static int xqmstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqmstat_proc_show, NULL); -} - -static const struct file_operations xqmstat_proc_fops = { - .owner = THIS_MODULE, - .open = xqmstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_XFS_QUOTA */ -#ifdef CONFIG_PROC_FS int xfs_init_procfs(void) { @@ -174,11 +149,9 @@ xfs_init_procfs(void) goto out; #ifdef CONFIG_XFS_QUOTA - if (!proc_create("fs/xfs/xqmstat", 0, NULL, - &xqmstat_proc_fops)) + if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show)) goto out; - if (!proc_create("fs/xfs/xqm", 0, NULL, - &xqm_proc_fops)) + if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show)) goto out; #endif return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d71424052917..f643d76db516 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -63,7 +63,7 @@ #include <linux/parser.h> static const struct super_operations xfs_super_operations; -struct bio_set *xfs_ioend_bioset; +struct bio_set xfs_ioend_bioset; static struct kset *xfs_kset; /* top-level xfs sysfs dir */ #ifdef DEBUG @@ -1845,10 +1845,9 @@ MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) { - xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, + if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE, offsetof(struct xfs_ioend, io_inline_bio), - BIOSET_NEED_BVECS); - if (!xfs_ioend_bioset) + BIOSET_NEED_BVECS)) goto out; xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), @@ -1997,7 +1996,7 @@ xfs_init_zones(void) out_destroy_log_ticket_zone: kmem_zone_destroy(xfs_log_ticket_zone); out_free_ioend_bioset: - bioset_free(xfs_ioend_bioset); + bioset_exit(&xfs_ioend_bioset); out: return -ENOMEM; } @@ -2029,7 +2028,7 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_btree_cur_zone); kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_log_ticket_zone); - bioset_free(xfs_ioend_bioset); + bioset_exit(&xfs_ioend_bioset); } STATIC int __init diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 880a292d792f..ad2868263867 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -4,7 +4,16 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { + /* + * Use the non-coherent ops if available. If an architecture wants a + * more fine-grained selection of operations it will have to implement + * get_arch_dma_ops itself or use the per-device dma_ops. + */ +#ifdef CONFIG_DMA_NONCOHERENT_OPS + return &dma_noncoherent_ops; +#else return &dma_direct_ops; +#endif } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h index 830d7659289b..6bb3cd3d695a 100644 --- a/include/asm-generic/pci.h +++ b/include/asm-generic/pci.h @@ -14,12 +14,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */ -/* - * By default, assume that no iommu is in use and that the PCI - * space is mapped to address physical 0. - */ -#ifndef PCI_DMA_BUS_IS_PHYS -#define PCI_DMA_BUS_IS_PHYS (1) -#endif - #endif /* _ASM_GENERIC_PCI_H */ diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h index dd2a8cf7d20b..ccb5aa8468e0 100644 --- a/include/drm/bridge/dw_hdmi.h +++ b/include/drm/bridge/dw_hdmi.h @@ -151,7 +151,7 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev, struct drm_encoder *encoder, const struct dw_hdmi_plat_data *plat_data); -void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense); +void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense); void dw_hdmi_set_sample_rate(struct dw_hdmi *hdmi, unsigned int rate); void dw_hdmi_audio_enable(struct dw_hdmi *hdmi); diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 40373920ea58..23f805562f4e 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -145,7 +145,12 @@ extern rwlock_t atalk_interfaces_lock; extern struct atalk_route atrtr_default; -extern const struct file_operations atalk_seq_arp_fops; +struct aarp_iter_state { + int bucket; + struct aarp_entry **table; +}; + +extern const struct seq_operations aarp_seq_ops; extern int sysctl_aarp_expiry_time; extern int sysctl_aarp_tick_time; diff --git a/include/linux/bio.h b/include/linux/bio.h index ce547a25e8ae..810a8bee8f85 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -67,8 +67,12 @@ #define bio_multiple_segments(bio) \ ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len) -#define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) -#define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) + +#define bvec_iter_sectors(iter) ((iter).bi_size >> 9) +#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) + +#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) +#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* * Return the data direction, READ or WRITE. @@ -406,13 +410,13 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, return bio_split(bio, sectors, gfp, bs); } -extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags); enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), }; -extern void bioset_free(struct bio_set *); -extern mempool_t *biovec_create_pool(int pool_entries); +extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); +extern void bioset_exit(struct bio_set *); +extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *); extern void bio_put(struct bio *); @@ -421,11 +425,11 @@ extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); -extern struct bio_set *fs_bio_set; +extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) @@ -499,7 +503,10 @@ static inline void bio_flush_dcache_pages(struct bio *bi) } #endif +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); +extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, @@ -507,7 +514,13 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct iov_iter *, gfp_t); extern int bio_uncopy_user(struct bio *); -void zero_fill_bio(struct bio *bio); +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} + extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); @@ -722,11 +735,11 @@ struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; - mempool_t *bio_pool; - mempool_t *bvec_pool; + mempool_t bio_pool; + mempool_t bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; - mempool_t *bvec_integrity_pool; + mempool_t bio_integrity_pool; + mempool_t bvec_integrity_pool; #endif /* @@ -745,6 +758,11 @@ struct biovec_slab { struct kmem_cache *slab; }; +static inline bool bioset_initialized(struct bio_set *bs) +{ + return bs->bio_slab != NULL; +} + /* * a small number of entries is fine, not going to be performance critical. * basically we just need to survive diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ebc34a5686dc..fb355173f3c7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -259,7 +259,8 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); - +bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, + struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 17b18b91ebac..3c4f390aea4b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/bvec.h> +#include <linux/ktime.h> struct bio_set; struct bio; @@ -90,10 +91,52 @@ static inline bool blk_path_error(blk_status_t error) return true; } -struct blk_issue_stat { - u64 stat; +/* + * From most significant bit: + * 1 bit: reserved for other usage, see below + * 12 bits: original size of bio + * 51 bits: issue time of bio + */ +#define BIO_ISSUE_RES_BITS 1 +#define BIO_ISSUE_SIZE_BITS 12 +#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) +#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) +#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) +#define BIO_ISSUE_SIZE_MASK \ + (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) +#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) + +/* Reserved bit for blk-throtl */ +#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) + +struct bio_issue { + u64 value; }; +static inline u64 __bio_issue_time(u64 time) +{ + return time & BIO_ISSUE_TIME_MASK; +} + +static inline u64 bio_issue_time(struct bio_issue *issue) +{ + return __bio_issue_time(issue->value); +} + +static inline sector_t bio_issue_size(struct bio_issue *issue) +{ + return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); +} + +static inline void bio_issue_init(struct bio_issue *issue, + sector_t size) +{ + size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; + issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | + (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | + ((u64)size << BIO_ISSUE_SIZE_SHIFT)); +} + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -138,7 +181,7 @@ struct bio { struct cgroup_subsys_state *bi_css; #ifdef CONFIG_BLK_DEV_THROTTLING_LOW void *bi_cg_private; - struct blk_issue_stat bi_issue_stat; + struct bio_issue bi_issue; #endif #endif union { @@ -186,6 +229,8 @@ struct bio { * throttling rules. Don't do it again. */ #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion * of this bio. */ +#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ + /* See BVEC_POOL_OFFSET below before adding new flags */ /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c4eee043191..bca3a92eb55f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -125,16 +125,23 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) /* The per-zone write lock is held for this request */ #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) -/* timeout is expired */ -#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) /* already slept for hybrid poll */ -#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21)) +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) /* + * Request state for blk-mq. + */ +enum mq_rq_state { + MQ_RQ_IDLE = 0, + MQ_RQ_IN_FLIGHT = 1, + MQ_RQ_COMPLETE = 2, +}; + +/* * Try to put the fields that are referenced together in the same cacheline. * * If you modify this structure, make sure to update blk_rq_init() and @@ -205,9 +212,20 @@ struct request { struct gendisk *rq_disk; struct hd_struct *part; - unsigned long start_time; - struct blk_issue_stat issue_stat; - /* Number of scatter-gather DMA addr+len pairs after + /* Time that I/O was submitted to the kernel. */ + u64 start_time_ns; + /* Time that I/O was submitted to the device. */ + u64 io_start_time_ns; + +#ifdef CONFIG_BLK_WBT + unsigned short wbt_flags; +#endif +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + unsigned short throtl_size; +#endif + + /* + * Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments; @@ -219,32 +237,14 @@ struct request { unsigned short write_hint; unsigned short ioprio; - unsigned int timeout; - void *special; /* opaque pointer available for LLD use */ unsigned int extra_len; /* length of alignment and padding */ - /* - * On blk-mq, the lower bits of ->gstate (generation number and - * state) carry the MQ_RQ_* state value and the upper bits the - * generation number which is monotonically incremented and used to - * distinguish the reuse instances. - * - * ->gstate_seq allows updates to ->gstate and other fields - * (currently ->deadline) during request start to be read - * atomically from the timeout path, so that it can operate on a - * coherent set of information. - */ - seqcount_t gstate_seq; - u64 gstate; + enum mq_rq_state state; + refcount_t ref; - /* - * ->aborted_gstate is used by the timeout to claim a specific - * recycle instance of this request. See blk_mq_timeout_work(). - */ - struct u64_stats_sync aborted_gstate_sync; - u64 aborted_gstate; + unsigned int timeout; /* access through blk_rq_set_deadline, blk_rq_deadline */ unsigned long __deadline; @@ -267,8 +267,6 @@ struct request { #ifdef CONFIG_BLK_CGROUP struct request_list *rl; /* rl this rq is alloced from */ - unsigned long long start_time_ns; - unsigned long long io_start_time_ns; /* when passed to hardware */ #endif }; @@ -328,9 +326,8 @@ typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { - BLK_EH_NOT_HANDLED, - BLK_EH_HANDLED, - BLK_EH_RESET_TIMER, + BLK_EH_DONE, /* drivers has completed the command */ + BLK_EH_RESET_TIMER, /* reset timer and try again */ }; typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); @@ -655,7 +652,7 @@ struct request_queue { struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; - struct bio_set *bio_split; + struct bio_set bio_split; #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; @@ -967,11 +964,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); -extern struct request *blk_get_request_flags(struct request_queue *, - unsigned int op, - blk_mq_req_flags_t flags); extern struct request *blk_get_request(struct request_queue *, unsigned int op, - gfp_t gfp_mask); + blk_mq_req_flags_t flags); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, @@ -1788,48 +1782,6 @@ int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -#ifdef CONFIG_BLK_CGROUP -/* - * This should not be using sched_clock(). A real patch is in progress - * to fix this up, until that is in place we need to disable preemption - * around sched_clock() in this function and set_io_start_time_ns(). - */ -static inline void set_start_time_ns(struct request *req) -{ - preempt_disable(); - req->start_time_ns = sched_clock(); - preempt_enable(); -} - -static inline void set_io_start_time_ns(struct request *req) -{ - preempt_disable(); - req->io_start_time_ns = sched_clock(); - preempt_enable(); -} - -static inline uint64_t rq_start_time_ns(struct request *req) -{ - return req->start_time_ns; -} - -static inline uint64_t rq_io_start_time_ns(struct request *req) -{ - return req->io_start_time_ns; -} -#else -static inline void set_start_time_ns(struct request *req) {} -static inline void set_io_start_time_ns(struct request *req) {} -static inline uint64_t rq_start_time_ns(struct request *req) -{ - return 0; -} -static inline uint64_t rq_io_start_time_ns(struct request *req) -{ - return 0; -} -#endif - #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e61c395fddf..df36b1b08af0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -142,10 +142,11 @@ struct bpf_verifier_state_list { struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ - struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 28a7ccc55c89..6aeaf6472665 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -72,8 +72,7 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size, - void (*release)(struct device *)); + bsg_job_fn *job_fn, int dd_job_size); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0c7dd9ceb139..dac37b6e00ec 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -17,17 +17,13 @@ struct bsg_ops { struct bsg_class_device { struct device *class_dev; - struct device *parent; int minor; struct request_queue *queue; - struct kref ref; const struct bsg_ops *ops; - void (*release)(struct device *); }; int bsg_register_queue(struct request_queue *q, struct device *parent, - const char *name, const struct bsg_ops *ops, - void (*release)(struct device *)); + const char *name, const struct bsg_ops *ops); int bsg_scsi_register_queue(struct request_queue *q, struct device *parent); void bsg_unregister_queue(struct request_queue *q); #else diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7b01bc11c692..a97a63eef59f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 94acbde17bb1..66c6e17e61e5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -224,6 +224,7 @@ extern seqlock_t rename_lock; * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); +extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); diff --git a/include/linux/device.h b/include/linux/device.h index 477956990f5e..00b6c3b42437 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -88,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @resume: Called to bring a device on this bus out of sleep mode. * @num_vf: Called to find out how many virtual functions a device on this * bus supports. + * @dma_configure: Called to setup DMA configuration on a device on + this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -96,8 +98,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @p: The private data of the driver core, only the driver core can * touch this. * @lock_key: Lock class key for use by the lock validator - * @force_dma: Assume devices on this bus should be set up by dma_configure() - * even if DMA capability is not explicitly described by firmware. * * A bus is a channel between the processor and one or more devices. For the * purposes of the device model, all devices are connected via a bus, even if @@ -130,14 +130,14 @@ struct bus_type { int (*num_vf)(struct device *dev); + int (*dma_configure)(struct device *dev); + const struct dev_pm_ops *pm; const struct iommu_ops *iommu_ops; struct subsys_private *p; struct lock_class_key lock_key; - - bool force_dma; }; extern int __must_check bus_register(struct bus_type *bus); @@ -904,6 +904,8 @@ struct dev_links_info { * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. + * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself + * indicates support for a higher limit in the dma_mask field. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -992,6 +994,7 @@ struct device { bool offline_disabled:1; bool offline:1; bool of_node_reused:1; + bool dma_32bit_limit:1; }; static inline struct device *kobj_to_dev(struct kobject *kobj) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index c7d844f09c3a..a785f2507159 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -30,8 +30,6 @@ struct bus_type; extern void dma_debug_add_bus(struct bus_type *bus); -extern void dma_debug_init(u32 num_entries); - extern int dma_debug_resize_entries(u32 num_entries); extern void debug_dma_map_page(struct device *dev, struct page *page, @@ -100,10 +98,6 @@ static inline void dma_debug_add_bus(struct bus_type *bus) { } -static inline void dma_debug_init(u32 num_entries) -{ -} - static inline int dma_debug_resize_entries(u32 num_entries) { return 0; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 53ad6a47f513..8d9f33febde5 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -59,6 +59,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); - +int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f8ab1c0f589e..f9cc309507d9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -133,10 +133,10 @@ struct dma_map_ops { #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK u64 (*get_required_mask)(struct device *dev); #endif - int is_phys; }; extern const struct dma_map_ops dma_direct_ops; +extern const struct dma_map_ops dma_noncoherent_ops; extern const struct dma_map_ops dma_virt_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -502,7 +502,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #ifndef arch_dma_alloc_attrs -#define arch_dma_alloc_attrs(dev, flag) (true) +#define arch_dma_alloc_attrs(dev) (true) #endif static inline void *dma_alloc_attrs(struct device *dev, size_t size, @@ -521,7 +521,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (!arch_dma_alloc_attrs(&dev, &flag)) + if (!arch_dma_alloc_attrs(&dev)) return NULL; if (!ops->alloc) return NULL; @@ -572,14 +572,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -/* - * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please - * don't use this in new code. - */ -#ifndef arch_dma_supported -#define arch_dma_supported(dev, mask) (1) -#endif - static inline void dma_check_mask(struct device *dev, u64 mask) { if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) @@ -592,9 +584,6 @@ static inline int dma_supported(struct device *dev, u64 mask) if (!ops) return 0; - if (!arch_dma_supported(dev, mask)) - return 0; - if (!ops->dma_supported) return 1; return ops->dma_supported(dev, mask); @@ -839,7 +828,7 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_mmap_writecombine dma_mmap_wc #endif -#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG) +#ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h new file mode 100644 index 000000000000..10b2654d549b --- /dev/null +++ b/include/linux/dma-noncoherent.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_DMA_NONCOHERENT_H +#define _LINUX_DMA_NONCOHERENT_H 1 + +#include <linux/dma-mapping.h> + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs); +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); + +#ifdef CONFIG_DMA_NONCOHERENT_MMAP +int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +#else +#define arch_dma_mmap NULL +#endif /* CONFIG_DMA_NONCOHERENT_MMAP */ + +#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC +void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); +#else +#define arch_dma_cache_sync NULL +#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_device(struct device *dev, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_cpu(struct device *dev, + phys_addr_t paddr, size_t size, enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ + +#endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 6d9e230dffd2..a02deea30185 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -218,8 +218,6 @@ extern void elv_unregister(struct elevator_type *); extern ssize_t elv_iosched_show(struct request_queue *, char *); extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); -extern int elevator_init(struct request_queue *, char *); -extern void elevator_exit(struct request_queue *, struct elevator_queue *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 760d8da1b6c7..5d306028f0a2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -94,7 +94,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond - * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() + * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open() */ /* file is open for reading */ @@ -2570,7 +2570,7 @@ extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK extern void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev); + struct block_device *bdev, bool verbose); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *, bool); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 1a4582b44d32..fc5ab85278d5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -464,7 +464,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON(!node_online(nid)); + VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); return __alloc_pages(gfp_mask, order, nid); } diff --git a/include/linux/ide.h b/include/linux/ide.h index ca9d34feb572..c74b0321922a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -961,7 +961,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) typedef struct { const char *name; umode_t mode; - const struct file_operations *proc_fops; + int (*show)(struct seq_file *, void *); } ide_proc_entry_t; void proc_ide_create(void); @@ -973,8 +973,8 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); -extern const struct file_operations ide_capacity_proc_fops; -extern const struct file_operations ide_geometry_proc_fops; +int ide_capacity_proc_show(struct seq_file *m, void *v); +int ide_geometry_proc_show(struct seq_file *m, void *v); #else static inline void proc_ide_create(void) { ; } static inline void proc_ide_destroy(void) { ; } @@ -1508,8 +1508,6 @@ static inline void ide_set_hwifdata (ide_hwif_t * hwif, void *data) hwif->hwif_data = data; } -extern void ide_toggle_bounce(ide_drive_t *drive, int on); - u64 ide_get_lba_addr(struct ide_cmd *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index b9e22b7e2f28..d1171db23742 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -53,7 +53,7 @@ struct iio_buffer_access_funcs { int (*request_update)(struct iio_buffer *buffer); int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd); - int (*set_length)(struct iio_buffer *buffer, int length); + int (*set_length)(struct iio_buffer *buffer, unsigned int length); int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); @@ -72,10 +72,10 @@ struct iio_buffer_access_funcs { */ struct iio_buffer { /** @length: Number of datums in buffer. */ - int length; + unsigned int length; /** @bytes_per_datum: Size of individual datum including timestamp. */ - int bytes_per_datum; + size_t bytes_per_datum; /** * @access: Buffer access functions associated with the diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index cb9a9248c8c0..70d01edcbf8b 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -2,6 +2,7 @@ #ifndef _LINUX_IOMMU_HELPER_H #define _LINUX_IOMMU_HELPER_H +#include <linux/bug.h> #include <linux/kernel.h> static inline unsigned long iommu_device_max_index(unsigned long size, @@ -14,9 +15,15 @@ static inline unsigned long iommu_device_max_index(unsigned long size, return size; } -extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size); +static inline int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, unsigned long boundary_size) +{ + BUG_ON(!is_power_of_2(boundary_size)); + + shift = (shift + index) & (boundary_size - 1); + return shift + nr > boundary_size; +} + extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h index 11b57c485854..d75e1ad72964 100644 --- a/include/linux/isdn/capilli.h +++ b/include/linux/isdn/capilli.h @@ -50,7 +50,7 @@ struct capi_ctr { u16 (*send_message)(struct capi_ctr *, struct sk_buff *skb); char *(*procinfo)(struct capi_ctr *); - const struct file_operations *proc_fops; + int (*proc_show)(struct seq_file *, void *); /* filled in before calling ready callback */ u8 manu[CAPI_MANUFACTURER_LEN]; /* CAPI_GET_MANUFACTURER */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 1795fecdea17..1c113134c98f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1133,7 +1133,6 @@ extern int ata_sas_port_start(struct ata_port *ap); extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); -extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); @@ -1359,7 +1358,6 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ - .eh_timed_out = ata_scsi_timed_out, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity, \ .sdev_attrs = ata_common_sdev_attrs diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 6e0859b9d4d2..e9e0d1c7eaf5 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -489,7 +489,7 @@ typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int flags); -typedef void (nvm_tgt_exit_fn)(void *); +typedef void (nvm_tgt_exit_fn)(void *, bool); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0e49b5b1ee1..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -216,6 +216,9 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/mempool.h b/include/linux/mempool.h index b51f5c430c26..0c964ac107c2 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -25,6 +25,18 @@ typedef struct mempool_s { wait_queue_head_t wait; } mempool_t; +static inline bool mempool_initialized(mempool_t *pool) +{ + return pool->elements != NULL; +} + +void mempool_exit(mempool_t *pool); +int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id); +int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); + extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, @@ -43,6 +55,14 @@ extern void mempool_free(void *element, mempool_t *pool); */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); + +static inline int +mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) +{ + return mempool_init(pool, min_nr, mempool_alloc_slab, + mempool_free_slab, (void *) kc); +} + static inline mempool_t * mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) { @@ -56,6 +76,13 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); + +static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + return mempool_init(pool, min_nr, mempool_kmalloc, + mempool_kfree, (void *) size); +} + static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) { return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, @@ -68,6 +95,13 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); void mempool_free_pages(void *element, void *pool_data); + +static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) +{ + return mempool_init(pool, min_nr, mempool_alloc_pages, + mempool_free_pages, (void *)(long)order); +} + static inline mempool_t *mempool_create_page_pool(int min_nr, int order) { return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2a156c5dfadd..d703774982ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1286,17 +1286,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - struct irq_desc *desc; - unsigned int irq; - int eqn; - int err; - - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); - if (err) - return NULL; - - desc = irq_to_desc(irq); - return desc->affinity_hint; + return dev->priv.irq_info[vector].mask; } #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fa9a255dbf..02a616e2f17d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2109,7 +2109,6 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); -extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; diff --git a/include/linux/node.h b/include/linux/node.h index 41f171861dcc..6d336e38d155 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -32,9 +32,11 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages); +extern int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid); #else -static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +static inline int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid) { return 0; } @@ -57,7 +59,7 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages); + error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true); } return error; diff --git a/include/linux/nospec.h b/include/linux/nospec.h index e791ebc65c9c..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include <asm/barrier.h> +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, \ (typeof(_i)) (_i & _mask); \ }) + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); + #endif /* _LINUX_NOSPEC_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4112e2bd747f..2950ce957656 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -436,10 +436,19 @@ enum { enum { NVME_AER_ERROR = 0, NVME_AER_SMART = 1, + NVME_AER_NOTICE = 2, NVME_AER_CSS = 6, NVME_AER_VS = 7, - NVME_AER_NOTICE_NS_CHANGED = 0x0002, - NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, +}; + +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x00, + NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, +}; + +enum { + NVME_AEN_CFG_NS_ATTR = 1 << 8, + NVME_AEN_CFG_FW_ACT = 1 << 9, }; struct nvme_lba_range_type { @@ -747,6 +756,7 @@ enum { NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_CHANGED_NS = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, @@ -755,6 +765,8 @@ enum { NVME_FWACT_ACTV = (2 << 3), }; +#define NVME_MAX_CHANGED_NAMESPACES 1024 + struct nvme_identify { __u8 opcode; __u8 flags; diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8da5a1b31ece..165fd302b442 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -55,7 +55,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -int of_dma_configure(struct device *dev, struct device_node *np); +int of_dma_configure(struct device *dev, + struct device_node *np, + bool force_dma); void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ @@ -105,7 +107,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return NULL; } -static inline int of_dma_configure(struct device *dev, struct device_node *np) +static inline int of_dma_configure(struct device *dev, + struct device_node *np, + bool force_dma) { return 0; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 73178a2fcee0..55371cb827ad 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -670,7 +670,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); -#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT typedef u64 pci_bus_addr_t; #else typedef u32 pci_bus_addr_t; diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 93d142ad1528..174601554b06 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -186,7 +186,7 @@ struct pktcdvd_device sector_t current_sector; /* Keep track of where the elevator is */ atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ /* needs to be run. */ - mempool_t *rb_pool; /* mempool for pkt_rb_node allocations */ + mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ struct packet_iosched iosched; struct gendisk *disk; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 49f634d96118..3097c943fab9 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -356,6 +356,8 @@ extern int platform_pm_restore(struct device *dev); #define platform_pm_restore NULL #endif +extern int platform_dma_configure(struct device *dev); + #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ .suspend = platform_pm_suspend, \ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 928ef9e4d912..e518352137e7 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -9,6 +9,8 @@ #include <linux/fs.h> struct proc_dir_entry; +struct seq_file; +struct seq_operations; #ifdef CONFIG_PROC_FS @@ -23,6 +25,19 @@ extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t, extern struct proc_dir_entry *proc_mkdir_mode(const char *, umode_t, struct proc_dir_entry *); struct proc_dir_entry *proc_create_mount_point(const char *name); + +struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data); +#define proc_create_seq_data(name, mode, parent, ops, data) \ + proc_create_seq_private(name, mode, parent, ops, 0, data) +#define proc_create_seq(name, mode, parent, ops) \ + proc_create_seq_private(name, mode, parent, ops, 0, NULL) +struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data); +#define proc_create_single(name, mode, parent, show) \ + proc_create_single_data(name, mode, parent, show, NULL) extern struct proc_dir_entry *proc_create_data(const char *, umode_t, struct proc_dir_entry *, @@ -38,6 +53,15 @@ extern void proc_remove(struct proc_dir_entry *); extern void remove_proc_entry(const char *, struct proc_dir_entry *); extern int remove_proc_subtree(const char *, struct proc_dir_entry *); +struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data); +#define proc_create_net(name, mode, parent, state_size, ops) \ + proc_create_net_data(name, mode, parent, state_size, ops, NULL) +struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data); + #else /* CONFIG_PROC_FS */ static inline void proc_root_init(void) @@ -57,6 +81,11 @@ static inline struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; } static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, struct proc_dir_entry *parent) { return NULL; } +#define proc_create_seq_private(name, mode, parent, ops, size, data) ({NULL;}) +#define proc_create_seq_data(name, mode, parent, ops, data) ({NULL;}) +#define proc_create_seq(name, mode, parent, ops) ({NULL;}) +#define proc_create_single(name, mode, parent, show) ({NULL;}) +#define proc_create_single_data(name, mode, parent, show, data) ({NULL;}) #define proc_create(name, mode, parent, proc_fops) ({NULL;}) #define proc_create_data(name, mode, parent, proc_fops, data) ({NULL;}) @@ -69,6 +98,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {} #define remove_proc_entry(name, parent) do {} while (0) static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; } +#define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;}) +#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;}) +#define proc_create_net_single(name, mode, parent, show, data) ({NULL;}) + #endif /* CONFIG_PROC_FS */ struct net; @@ -83,4 +116,10 @@ struct ns_common; int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)); +/* get the associated pid namespace for a file in procfs */ +static inline struct pid_namespace *proc_pid_ns(struct inode *inode) +{ + return inode->i_sb->s_fs_info; +} + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 841585f6e5f2..e6539536dea9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -127,6 +127,12 @@ struct sbitmap_queue { * @round_robin: Allocate bits in strict round-robin order. */ bool round_robin; + + /** + * @min_shallow_depth: The minimum shallow depth which may be passed to + * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + */ + unsigned int min_shallow_depth; }; /** @@ -390,6 +396,9 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq); * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * + * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after + * initializing @sbq. + * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, @@ -424,6 +433,9 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * + * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after + * initializing @sbq. + * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, @@ -439,6 +451,23 @@ static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, } /** + * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the + * minimum shallow depth that will be used. + * @sbq: Bitmap queue in question. + * @min_shallow_depth: The minimum shallow depth that will be passed to + * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + * + * sbitmap_queue_clear() batches wakeups as an optimization. The batch size + * depends on the depth of the bitmap. Since the shallow allocation functions + * effectively operate with a different depth, the shallow depth must be taken + * into account when calculating the batch size. This function must be called + * with the minimum shallow depth that will be used. Failure to do so can result + * in missed wakeups. + */ +void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, + unsigned int min_shallow_depth); + +/** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. @@ -484,6 +513,13 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); /** + * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue + * on a &struct sbitmap_queue. + * @sbq: Bitmap queue to wake up. + */ +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); + +/** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct * seq_file. * @sbq: Bitmap queue to show. diff --git a/include/linux/sched.h b/include/linux/sched.h index c2413703f45d..ca3f3eae8980 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1433,7 +1433,8 @@ static inline bool is_percpu_thread(void) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ - +#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1458,6 +1459,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index c723a5c4e3ff..e5320f6c8654 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -4,8 +4,9 @@ #include <uapi/linux/seccomp.h> -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ - SECCOMP_FILTER_FLAG_LOG) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW) #ifdef CONFIG_SECCOMP diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index 43ccd84127b6..0fdbe1ddd8d1 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -13,12 +13,6 @@ struct seq_net_private { #endif }; -int seq_open_net(struct inode *, struct file *, - const struct seq_operations *, int); -int single_open_net(struct inode *, struct file *file, - int (*show)(struct seq_file *, void *)); -int seq_release_net(struct inode *, struct file *); -int single_release_net(struct inode *, struct file *); static inline struct net *seq_file_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS @@ -28,4 +22,17 @@ static inline struct net *seq_file_net(struct seq_file *seq) #endif } +/* + * This one is needed for proc_create_net_single since net is stored directly + * in private not as a struct i.e. seq_file_net can't be used. + */ +static inline struct net *seq_file_single_net(struct seq_file *seq) +{ +#ifdef CONFIG_NET_NS + return (struct net *)seq->private; +#else + return &init_net; +#endif +} + #endif diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a5704daf5df9..e90b9bd99ded 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -122,8 +122,6 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -extern int rpc_rmdir(struct dentry *dentry); - struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, diff --git a/include/linux/tty.h b/include/linux/tty.h index 1dd587ba6d88..9bd7d37adbfa 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -10,6 +10,7 @@ #include <linux/tty_ldisc.h> #include <linux/mutex.h> #include <linux/tty_flags.h> +#include <linux/seq_file.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> @@ -535,7 +536,7 @@ extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); -extern const struct file_operations tty_ldiscs_proc_fops; +extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 31c2b5b166de..71dbc891851a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -293,7 +293,7 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif - const struct file_operations *proc_fops; + int (*proc_show)(struct seq_file *, void *); } __randomize_layout; struct tty_driver { diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d70f77a4b62a..6dad031be3c2 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -46,7 +46,6 @@ struct xattr { size_t value_len; }; -ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); diff --git a/include/net/ax25.h b/include/net/ax25.h index c91bc87931c7..3f9aea8087e3 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -15,6 +15,7 @@ #include <linux/refcount.h> #include <net/neighbour.h> #include <net/sock.h> +#include <linux/seq_file.h> #define AX25_T1CLAMPLO 1 #define AX25_T1CLAMPHI (30 * HZ) @@ -399,7 +400,7 @@ int ax25_check_iframes_acked(ax25_cb *, unsigned short); /* ax25_route.c */ void ax25_rt_device_down(struct net_device *); int ax25_rt_ioctl(unsigned int, void __user *); -extern const struct file_operations ax25_route_fops; +extern const struct seq_operations ax25_rt_seqops; ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev); int ax25_rt_autobind(ax25_cb *, ax25_address *); struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *, @@ -455,7 +456,7 @@ unsigned long ax25_display_timer(struct timer_list *); extern int ax25_uid_policy; ax25_uid_assoc *ax25_findbyuid(kuid_t); int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *); -extern const struct file_operations ax25_uid_fops; +extern const struct seq_operations ax25_uid_seqops; void ax25_uid_free(void); /* sysctl_net_ax25.c */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5e86fd9dc857..0e79c3408569 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -394,7 +394,15 @@ void fib6_gc_cleanup(void); int fib6_init(void); -int ipv6_route_open(struct inode *inode, struct file *file); +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker w; + loff_t skip; + struct fib6_table *tbl; + int sernum; +}; + +extern const struct seq_operations ipv6_route_seq_ops; int call_fib6_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index eb0bec043c96..aea7a124e66b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -41,18 +41,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) return net->ipvs; } -/* This one needed for single_open_net since net is stored directly in - * private not as a struct i.e. seq_file_net can't be used. - */ -static inline struct net *seq_file_single_net(struct seq_file *seq) -{ -#ifdef CONFIG_NET_NS - return (struct net *)seq->private; -#else - return &init_net; -#endif -} - /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cd368d1b8cb8..a1e28dd5d0bf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -170,6 +170,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); +void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -736,6 +737,10 @@ struct nft_expr_ops { int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); + void (*activate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*deactivate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, diff --git a/include/net/netrom.h b/include/net/netrom.h index 0dad2dd5f9d7..5a0714ff500f 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <linux/refcount.h> +#include <linux/seq_file.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 @@ -216,8 +217,8 @@ struct net_device *nr_dev_get(ax25_address *); int nr_rt_ioctl(unsigned int, void __user *); void nr_link_failed(ax25_cb *, int); int nr_route_frame(struct sk_buff *, ax25_cb *); -extern const struct file_operations nr_nodes_fops; -extern const struct file_operations nr_neigh_fops; +extern const struct seq_operations nr_node_seqops; +extern const struct seq_operations nr_neigh_seqops; void nr_rt_free(void); /* nr_subr.c */ diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 8639de5750f6..cbee32be1d9c 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -56,7 +56,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff -extern const struct file_operations pn_sock_seq_fops; -extern const struct file_operations pn_res_seq_fops; +extern const struct seq_operations pn_sock_seq_ops; +extern const struct seq_operations pn_res_seq_ops; #endif diff --git a/include/net/ping.h b/include/net/ping.h index 4cd90d6b5c25..fd080e043a6e 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -83,20 +83,9 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS -struct ping_seq_afinfo { - char *name; - sa_family_t family; - const struct file_operations *seq_fops; - const struct seq_operations seq_ops; -}; - -extern const struct file_operations ping_seq_fops; - void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos); void ping_seq_stop(struct seq_file *seq, void *v); -int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo); -void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo); int __init ping_proc_init(void); void ping_proc_exit(void); diff --git a/include/net/raw.h b/include/net/raw.h index 99d26d0c4a19..9c9fa98a91a4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -48,7 +48,6 @@ void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; - struct raw_hashinfo *h; }; static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) @@ -58,9 +57,6 @@ static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, - struct raw_hashinfo *h, const struct seq_operations *ops); - #endif int raw_hash_sk(struct sock *sk); diff --git a/include/net/rose.h b/include/net/rose.h index 04b72681f2ab..cf517d306a28 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -200,9 +200,9 @@ void rose_enquiry_response(struct sock *); /* rose_route.c */ extern struct rose_neigh *rose_loopback_neigh; -extern const struct file_operations rose_neigh_fops; -extern const struct file_operations rose_nodes_fops; -extern const struct file_operations rose_routes_fops; +extern const struct seq_operations rose_neigh_seqops; +extern const struct seq_operations rose_node_seqops; +extern struct seq_operations rose_route_seqops; void rose_add_loopback_neigh(void); int __must_check rose_add_loopback_node(rose_address *); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 28b996d63490..35498e613ff5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c */ +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c9b3768b350..51dc7a26a2fa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1747,27 +1747,22 @@ enum tcp_seq_states { TCP_SEQ_STATE_ESTABLISHED, }; -int tcp_seq_open(struct inode *inode, struct file *file); +void *tcp_seq_start(struct seq_file *seq, loff_t *pos); +void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos); +void tcp_seq_stop(struct seq_file *seq, void *v); struct tcp_seq_afinfo { - char *name; sa_family_t family; - const struct file_operations *seq_fops; - struct seq_operations seq_ops; }; struct tcp_iter_state { struct seq_net_private p; - sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; int bucket, offset, sbucket, num; loff_t last_pos; }; -int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); -void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); - extern struct request_sock_ops tcp_request_sock_ops; extern struct request_sock_ops tcp6_request_sock_ops; diff --git a/include/net/tls.h b/include/net/tls.h index b400d0bb7448..f5fb16da3860 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,6 +97,9 @@ struct tls_sw_context { u8 control; bool decrypted; + char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE]; + char rx_aad_plaintext[TLS_AAD_SPACE_SIZE]; + /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; diff --git a/include/net/udp.h b/include/net/udp.h index 0676b272f6ac..621778b80e3d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -408,31 +408,27 @@ do { \ #define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) #endif -/* /proc */ -int udp_seq_open(struct inode *inode, struct file *file); - +#ifdef CONFIG_PROC_FS struct udp_seq_afinfo { - char *name; sa_family_t family; struct udp_table *udp_table; - const struct file_operations *seq_fops; - struct seq_operations seq_ops; }; struct udp_iter_state { struct seq_net_private p; - sa_family_t family; int bucket; - struct udp_table *udp_table; }; -#ifdef CONFIG_PROC_FS -int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo); -void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo); +void *udp_seq_start(struct seq_file *seq, loff_t *pos); +void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); +void udp_seq_stop(struct seq_file *seq, void *v); + +extern const struct seq_operations udp_seq_ops; +extern const struct seq_operations udp6_seq_ops; int udp4_proc_init(void); void udp4_proc_exit(void); -#endif +#endif /* CONFIG_PROC_FS */ int udpv4_offload_init(void); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 23159dd5be18..a1fd63871d17 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { int writable; int hugetlb; struct work_struct work; - struct pid *pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 4a4201d997a7..095383a4bd1a 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { - struct ib_uobject *uobj = - uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject; + const struct uverbs_attr *attr; - if (IS_ERR(uobj)) - return uobj; + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) + return ERR_CAST(attr); - return uobj->object; + return attr->obj_attr.uobject->object; } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index a29d3086eb56..86a569d008b2 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -148,7 +148,6 @@ struct osd_request { u8 *pad_buff; } out, in; - gfp_t alloc_flags; unsigned timeout; unsigned retries; unsigned sense_len; @@ -202,14 +201,11 @@ static inline bool osd_req_is_ver1(struct osd_request *or) * * @osd_dev: OSD device that holds the scsi-device and default values * that the request is associated with. - * @gfp: The allocation flags to use for request allocation, and all - * subsequent allocations. This will be stored at - * osd_request->alloc_flags, can be changed by user later * * Allocate osd_request and initialize all members to the * default/initial state. */ -struct osd_request *osd_start_request(struct osd_dev *od, gfp_t gfp); +struct osd_request *osd_start_request(struct osd_dev *od); enum osd_req_options { OSD_REQ_FUA = 0x08, /* Force Unit Access */ diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 12f454cb6f61..53b485fe9b67 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -307,7 +307,7 @@ struct scsi_host_template { * EH_HANDLED: I fixed the error, please complete the command * EH_RESET_TIMER: I need more time, reset the timer and * begin counting again - * EH_NOT_HANDLED Begin normal error recovery + * EH_DONE: Begin normal error recovery * * Status: OPTIONAL */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index bc01e06bc716..0be866c91f62 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + __entry->newprio = pi_task ? + min(tsk->normal_prio, pi_task->prio) : + tsk->normal_prio; /* XXX SCHED_DEADLINE bits missing */ ), diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c5ec89732a8d..8c317737ba3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1017,6 +1017,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); @@ -1030,6 +1031,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h index 74b91151d494..bcba72def817 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -46,6 +46,9 @@ enum tcp_conntrack { /* Marks possibility for expected RFC5961 challenge ACK */ #define IP_CT_EXP_CHALLENGE_ACK 0x40 +/* Simultaneous open initialized */ +#define IP_CT_TCP_SIMULTANEOUS_OPEN 0x80 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9c3630146cec..271b93783d28 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2698,7 +2698,7 @@ enum nl80211_attrs { #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS -#define NL80211_WIPHY_NAME_MAXLEN 128 +#define NL80211_WIPHY_NAME_MAXLEN 64 #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index b19a9c249b15..784c2e3e572e 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -106,7 +106,7 @@ struct pppol2tp_ioc_stats { #define PPPIOCGIDLE _IOR('t', 63, struct ppp_idle) /* get idle time */ #define PPPIOCNEWUNIT _IOWR('t', 62, int) /* create new ppp unit */ #define PPPIOCATTACH _IOW('t', 61, int) /* attach to ppp unit */ -#define PPPIOCDETACH _IOW('t', 60, int) /* detach from ppp unit/chan */ +#define PPPIOCDETACH _IOW('t', 60, int) /* obsolete, do not use */ #define PPPIOCSMRRU _IOW('t', 59, int) /* set multilink MRU */ #define PPPIOCCONNECT _IOW('t', 58, int) /* connect channel to unit */ #define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index af5f8c2df87a..db9f15f5db04 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -207,4 +207,16 @@ struct prctl_mm_map { # define PR_SVE_VL_LEN_MASK 0xffff # define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) + #endif /* _LINUX_PRCTL_H */ diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 2a0bd9dd104d..9efc0e73d50b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -17,8 +17,9 @@ #define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC 1 -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) /* * All BPF programs must return a 32-bit value. diff --git a/init/main.c b/init/main.c index fd37315835b4..3b4ada11ed52 100644 --- a/init/main.c +++ b/init/main.c @@ -91,6 +91,7 @@ #include <linux/cache.h> #include <linux/rodata_test.h> #include <linux/jump_label.h> +#include <linux/mem_encrypt.h> #include <asm/io.h> #include <asm/bugs.h> diff --git a/ipc/shm.c b/ipc/shm.c index 3cf48988d68c..d73269381ec7 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1363,14 +1363,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, if (addr) { if (addr & (shmlba - 1)) { - /* - * Round down to the nearest multiple of shmlba. - * For sane do_mmap_pgoff() parameters, avoid - * round downs that trigger nil-page and MAP_FIXED. - */ - if ((shmflg & SHM_RND) && addr >= shmlba) - addr &= ~(shmlba - 1); - else + if (shmflg & SHM_RND) { + addr &= ~(shmlba - 1); /* round down */ + + /* + * Ensure that the round-down is non-nil + * when remapping. This can happen for + * cases when addr < shmlba. + */ + if (!addr && (shmflg & SHM_REMAP)) + goto out; + } else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba03ec39efb3..6ef6746a7871 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -218,47 +218,84 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) +static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, + u32 curr, const bool probe_pass) { + const s64 imm_min = S32_MIN, imm_max = S32_MAX; + s64 imm = insn->imm; + + if (curr < pos && curr + imm + 1 > pos) + imm += delta; + else if (curr > pos + delta && curr + imm + 1 <= pos + delta) + imm -= delta; + if (imm < imm_min || imm > imm_max) + return -ERANGE; + if (!probe_pass) + insn->imm = imm; + return 0; +} + +static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, + u32 curr, const bool probe_pass) +{ + const s32 off_min = S16_MIN, off_max = S16_MAX; + s32 off = insn->off; + + if (curr < pos && curr + off + 1 > pos) + off += delta; + else if (curr > pos + delta && curr + off + 1 <= pos + delta) + off -= delta; + if (off < off_min || off > off_max) + return -ERANGE; + if (!probe_pass) + insn->off = off; + return 0; +} + +static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, + const bool probe_pass) +{ + u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); struct bpf_insn *insn = prog->insnsi; - u32 i, insn_cnt = prog->len; - bool pseudo_call; - u8 code; - int off; + int ret = 0; for (i = 0; i < insn_cnt; i++, insn++) { + u8 code; + + /* In the probing pass we still operate on the original, + * unpatched image in order to check overflows before we + * do any other adjustments. Therefore skip the patchlet. + */ + if (probe_pass && i == pos) { + i += delta + 1; + insn++; + } code = insn->code; - if (BPF_CLASS(code) != BPF_JMP) - continue; - if (BPF_OP(code) == BPF_EXIT) + if (BPF_CLASS(code) != BPF_JMP || + BPF_OP(code) == BPF_EXIT) continue; + /* Adjust offset of jmps if we cross patch boundaries. */ if (BPF_OP(code) == BPF_CALL) { - if (insn->src_reg == BPF_PSEUDO_CALL) - pseudo_call = true; - else + if (insn->src_reg != BPF_PSEUDO_CALL) continue; + ret = bpf_adj_delta_to_imm(insn, pos, delta, i, + probe_pass); } else { - pseudo_call = false; + ret = bpf_adj_delta_to_off(insn, pos, delta, i, + probe_pass); } - off = pseudo_call ? insn->imm : insn->off; - - /* Adjust offset of jmps if we cross boundaries. */ - if (i < pos && i + off + 1 > pos) - off += delta; - else if (i > pos + delta && i + off + 1 <= pos + delta) - off -= delta; - - if (pseudo_call) - insn->imm = off; - else - insn->off = off; + if (ret) + break; } + + return ret; } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len) { u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; + const u32 cnt_max = S16_MAX; struct bpf_prog *prog_adj; /* Since our patchlet doesn't expand the image, we're done. */ @@ -269,6 +306,15 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, insn_adj_cnt = prog->len + insn_delta; + /* Reject anything that would potentially let the insn->off + * target overflow when we have excessive program expansions. + * We need to probe here before we do any reallocation where + * we afterwards may not fail anymore. + */ + if (insn_adj_cnt > cnt_max && + bpf_adj_branches(prog, off, insn_delta, true)) + return NULL; + /* Several new instructions need to be inserted. Make room * for them. Likely, there's no need for a new allocation as * last page could have large enough tailroom. @@ -294,7 +340,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, sizeof(*patch) * insn_rest); memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); - bpf_adj_branches(prog_adj, off, insn_delta); + /* We are guaranteed to not fail at this point, otherwise + * the ship has sailed to reverse to the original state. An + * overflow cannot happen at this point. + */ + BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); return prog_adj; } diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 098eca568c2b..95a84b2f10ce 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1703,11 +1703,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, * we increment the refcnt. If this is the case abort with an * error. */ - verdict = bpf_prog_inc_not_zero(stab->bpf_verdict); + verdict = bpf_prog_inc_not_zero(verdict); if (IS_ERR(verdict)) return PTR_ERR(verdict); - parse = bpf_prog_inc_not_zero(stab->bpf_parse); + parse = bpf_prog_inc_not_zero(parse); if (IS_ERR(parse)) { bpf_prog_put(verdict); return PTR_ERR(parse); @@ -1715,12 +1715,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, } if (tx_msg) { - tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg); + tx_msg = bpf_prog_inc_not_zero(tx_msg); if (IS_ERR(tx_msg)) { - if (verdict) - bpf_prog_put(verdict); - if (parse) + if (parse && verdict) { bpf_prog_put(parse); + bpf_prog_put(verdict); + } return PTR_ERR(tx_msg); } } @@ -1805,10 +1805,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, out_free: smap_release_sock(psock, sock); out_progs: - if (verdict) - bpf_prog_put(verdict); - if (parse) + if (parse && verdict) { bpf_prog_put(parse); + bpf_prog_put(verdict); + } if (tx_msg) bpf_prog_put(tx_msg); write_unlock_bh(&sock->sk_callback_lock); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5dd1dcb902bf..1904e814f282 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -156,7 +156,29 @@ struct bpf_verifier_stack_elem { #define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 -#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA) +#define BPF_MAP_PTR_UNPRIV 1UL +#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ + POISON_POINTER_DELTA)) +#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) + +static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) +{ + return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON; +} + +static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) +{ + return aux->map_state & BPF_MAP_PTR_UNPRIV; +} + +static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, + const struct bpf_map *map, bool unpriv) +{ + BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); + unpriv |= bpf_map_ptr_unpriv(aux); + aux->map_state = (unsigned long)map | + (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); +} struct bpf_call_arg_meta { struct bpf_map *map_ptr; @@ -978,7 +1000,7 @@ static bool register_is_null(struct bpf_reg_state *reg) */ static int check_stack_write(struct bpf_verifier_env *env, struct bpf_func_state *state, /* func where register points to */ - int off, int size, int value_regno) + int off, int size, int value_regno, int insn_idx) { struct bpf_func_state *cur; /* state of the current function */ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; @@ -1017,8 +1039,33 @@ static int check_stack_write(struct bpf_verifier_env *env, state->stack[spi].spilled_ptr = cur->regs[value_regno]; state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - for (i = 0; i < BPF_REG_SIZE; i++) + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose(env, + "insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack[spi].slot_type[i] = STACK_SPILL; + } } else { u8 type = STACK_MISC; @@ -1694,7 +1741,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (t == BPF_WRITE) err = check_stack_write(env, state, off, size, - value_regno); + value_regno, insn_idx); else err = check_stack_read(env, state, off, size, value_regno); @@ -2333,6 +2380,29 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return 0; } +static int +record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + int func_id, int insn_idx) +{ + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + + if (func_id != BPF_FUNC_tail_call && + func_id != BPF_FUNC_map_lookup_elem) + return 0; + if (meta->map_ptr == NULL) { + verbose(env, "kernel subsystem misconfigured verifier\n"); + return -EINVAL; + } + + if (!BPF_MAP_PTR(aux->map_state)) + bpf_map_ptr_store(aux, meta->map_ptr, + meta->map_ptr->unpriv_array); + else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr) + bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, + meta->map_ptr->unpriv_array); + return 0; +} + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { const struct bpf_func_proto *fn = NULL; @@ -2387,13 +2457,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; - if (func_id == BPF_FUNC_tail_call) { - if (meta.map_ptr == NULL) { - verbose(env, "verifier bug\n"); - return -EINVAL; - } - env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; - } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -2404,6 +2467,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (err) return err; + err = record_func_map(env, &meta, func_id, insn_idx); + if (err) + return err; + /* Mark slots with STACK_MISC in case of raw mode, stack offset * is inferred from register state. */ @@ -2428,8 +2495,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { - struct bpf_insn_aux_data *insn_aux; - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); @@ -2445,11 +2510,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; - insn_aux = &env->insn_aux_data[insn_idx]; - if (!insn_aux->map_ptr) - insn_aux->map_ptr = meta.map_ptr; - else if (insn_aux->map_ptr != meta.map_ptr) - insn_aux->map_ptr = BPF_MAP_PTR_POISON; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -5169,6 +5229,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; @@ -5417,6 +5505,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; + struct bpf_insn_aux_data *aux; struct bpf_insn insn_buf[16]; struct bpf_prog *new_prog; struct bpf_map *map_ptr; @@ -5491,19 +5580,22 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->imm = 0; insn->code = BPF_JMP | BPF_TAIL_CALL; + aux = &env->insn_aux_data[i + delta]; + if (!bpf_map_ptr_unpriv(aux)) + continue; + /* instead of changing every JIT dealing with tail_call * emit two extra insns: * if (index >= max_entries) goto out; * index &= array->index_mask; * to avoid out-of-bounds cpu speculation */ - map_ptr = env->insn_aux_data[i + delta].map_ptr; - if (map_ptr == BPF_MAP_PTR_POISON) { + if (bpf_map_ptr_poisoned(aux)) { verbose(env, "tail_call abusing map_ptr\n"); return -EINVAL; } - if (!map_ptr->unpriv_array) - continue; + + map_ptr = BPF_MAP_PTR(aux->map_state); insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 2); insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, @@ -5527,9 +5619,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_map_lookup_elem) { - map_ptr = env->insn_aux_data[i + delta].map_ptr; - if (map_ptr == BPF_MAP_PTR_POISON || - !map_ptr->ops->map_gen_lookup) + aux = &env->insn_aux_data[i + delta]; + if (bpf_map_ptr_poisoned(aux)) + goto patch_call_imm; + + map_ptr = BPF_MAP_PTR(aux->map_state); + if (!map_ptr->ops->map_gen_lookup) goto patch_call_imm; cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index b928b27050c6..0808a33d16d3 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -218,9 +218,9 @@ extern const struct proc_ns_operations cgroupns_operations; * cgroup-v1.c */ extern struct cftype cgroup1_base_files[]; -extern const struct file_operations proc_cgroupstats_operations; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; +int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index a2c05d2476ac..e06c97f3ed1a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -682,7 +682,7 @@ struct cftype cgroup1_base_files[] = { }; /* Display information about each subsystem and each hierarchy */ -static int proc_cgroupstats_show(struct seq_file *m, void *v) +int proc_cgroupstats_show(struct seq_file *m, void *v) { struct cgroup_subsys *ss; int i; @@ -705,18 +705,6 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) return 0; } -static int cgroupstats_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_cgroupstats_show, NULL); -} - -const struct file_operations proc_cgroupstats_operations = { - .open = cgroupstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /** * cgroupstats_build - build and fill cgroupstats * @stats: cgroupstats to fill information into diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a662bfcbea0e..12883656e63e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5335,7 +5335,7 @@ int __init cgroup_init(void) WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); WARN_ON(register_filesystem(&cgroup2_fs_type)); - WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations)); + WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show)); return 0; } diff --git a/kernel/dma.c b/kernel/dma.c index 3506fc34a712..40f152936316 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -135,21 +135,9 @@ static int proc_dma_show(struct seq_file *m, void *v) } #endif /* MAX_DMA_CHANNELS */ -static int proc_dma_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_dma_show, NULL); -} - -static const struct file_operations proc_dma_operations = { - .open = proc_dma_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_dma_init(void) { - proc_create("dma", 0, NULL, &proc_dma_operations); + proc_create_single("dma", 0, NULL, proc_dma_show); return 0; } diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index a5697119290e..33f07c5f2515 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -27,21 +27,9 @@ static int execdomains_proc_show(struct seq_file *m, void *v) return 0; } -static int execdomains_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, execdomains_proc_show, NULL); -} - -static const struct file_operations execdomains_proc_fops = { - .open = execdomains_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_execdomains_init(void) { - proc_create("execdomains", 0, NULL, &execdomains_proc_fops); + proc_create_single("execdomains", 0, NULL, execdomains_proc_show); return 0; } module_init(proc_execdomains_init); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 7cb091d81d91..37eda10f5c36 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -185,11 +185,6 @@ static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode)); } -static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, irq_affinity_hint_proc_show, PDE_DATA(inode)); -} - static const struct file_operations irq_affinity_proc_fops = { .open = irq_affinity_proc_open, .read = seq_read, @@ -198,13 +193,6 @@ static const struct file_operations irq_affinity_proc_fops = { .write = irq_affinity_proc_write, }; -static const struct file_operations irq_affinity_hint_proc_fops = { - .open = irq_affinity_hint_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct file_operations irq_affinity_list_proc_fops = { .open = irq_affinity_list_proc_open, .read = seq_read, @@ -223,32 +211,6 @@ static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v) { return show_irq_affinity(EFFECTIVE_LIST, m); } - -static int irq_effective_aff_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode)); -} - -static int irq_effective_aff_list_proc_open(struct inode *inode, - struct file *file) -{ - return single_open(file, irq_effective_aff_list_proc_show, - PDE_DATA(inode)); -} - -static const struct file_operations irq_effective_aff_proc_fops = { - .open = irq_effective_aff_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations irq_effective_aff_list_proc_fops = { - .open = irq_effective_aff_list_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif static int default_affinity_show(struct seq_file *m, void *v) @@ -313,18 +275,6 @@ static int irq_node_proc_show(struct seq_file *m, void *v) seq_printf(m, "%d\n", irq_desc_get_node(desc)); return 0; } - -static int irq_node_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, irq_node_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations irq_node_proc_fops = { - .open = irq_node_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif static int irq_spurious_proc_show(struct seq_file *m, void *v) @@ -337,18 +287,6 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) return 0; } -static int irq_spurious_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, irq_spurious_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations irq_spurious_proc_fops = { - .open = irq_spurious_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #define MAX_NAMELEN 128 static int name_unique(unsigned int irq, struct irqaction *new_action) @@ -421,24 +359,24 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) &irq_affinity_proc_fops, irqp); /* create /proc/irq/<irq>/affinity_hint */ - proc_create_data("affinity_hint", 0444, desc->dir, - &irq_affinity_hint_proc_fops, irqp); + proc_create_single_data("affinity_hint", 0444, desc->dir, + irq_affinity_hint_proc_show, irqp); /* create /proc/irq/<irq>/smp_affinity_list */ proc_create_data("smp_affinity_list", 0644, desc->dir, &irq_affinity_list_proc_fops, irqp); - proc_create_data("node", 0444, desc->dir, - &irq_node_proc_fops, irqp); + proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, + irqp); # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - proc_create_data("effective_affinity", 0444, desc->dir, - &irq_effective_aff_proc_fops, irqp); - proc_create_data("effective_affinity_list", 0444, desc->dir, - &irq_effective_aff_list_proc_fops, irqp); + proc_create_single_data("effective_affinity", 0444, desc->dir, + irq_effective_aff_proc_show, irqp); + proc_create_single_data("effective_affinity_list", 0444, desc->dir, + irq_effective_aff_list_proc_show, irqp); # endif #endif - proc_create_data("spurious", 0444, desc->dir, - &irq_spurious_proc_fops, (void *)(long)irq); + proc_create_single_data("spurious", 0444, desc->dir, + irq_spurious_proc_show, (void *)(long)irq); out_unlock: mutex_unlock(®ister_lock); diff --git a/kernel/kthread.c b/kernel/kthread.c index 2017a39ab490..481951bf091d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); void kthread_park_complete(struct task_struct *k) { - complete(&to_kthread(k)->parked); + complete_all(&to_kthread(k)->parked); } static int kthread(void *_create) @@ -459,6 +459,7 @@ void kthread_unpark(struct task_struct *k) if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) __kthread_bind(k, kthread->cpu, TASK_PARKED); + reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); wake_up_state(k, TASK_PARKED); } @@ -483,9 +484,6 @@ int kthread_park(struct task_struct *k) if (WARN_ON(k->flags & PF_EXITING)) return -ENOSYS; - if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) - return -EBUSY; - set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index ad69bbc9bd28..3dd980dfba2d 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -101,18 +101,6 @@ static const struct seq_operations lockdep_ops = { .show = l_show, }; -static int lockdep_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &lockdep_ops); -} - -static const struct file_operations proc_lockdep_operations = { - .open = lockdep_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #ifdef CONFIG_PROVE_LOCKING static void *lc_start(struct seq_file *m, loff_t *pos) { @@ -170,18 +158,6 @@ static const struct seq_operations lockdep_chains_ops = { .stop = lc_stop, .show = lc_show, }; - -static int lockdep_chains_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &lockdep_chains_ops); -} - -static const struct file_operations proc_lockdep_chains_operations = { - .open = lockdep_chains_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif /* CONFIG_PROVE_LOCKING */ static void lockdep_stats_debug_show(struct seq_file *m) @@ -355,18 +331,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v) return 0; } -static int lockdep_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, lockdep_stats_show, NULL); -} - -static const struct file_operations proc_lockdep_stats_operations = { - .open = lockdep_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_LOCK_STAT struct lock_stat_data { @@ -682,14 +646,11 @@ static const struct file_operations proc_lock_stat_operations = { static int __init lockdep_proc_init(void) { - proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); + proc_create_seq("lockdep", S_IRUSR, NULL, &lockdep_ops); #ifdef CONFIG_PROVE_LOCKING - proc_create("lockdep_chains", S_IRUSR, NULL, - &proc_lockdep_chains_operations); + proc_create_seq("lockdep_chains", S_IRUSR, NULL, &lockdep_chains_ops); #endif - proc_create("lockdep_stats", S_IRUSR, NULL, - &proc_lockdep_stats_operations); - + proc_create_single("lockdep_stats", S_IRUSR, NULL, lockdep_stats_show); #ifdef CONFIG_LOCK_STAT proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, &proc_lock_stat_operations); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 11b4282c2d20..1efcb5b0c3ed 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -269,7 +269,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, struct bio *bio; int error = 0; - bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1); + bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); bio_set_dev(bio, hib_resume_bdev); bio_set_op_attrs(bio, op, op_flags); @@ -376,7 +376,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) return -ENOSPC; if (hb) { - src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN | + src = (void *)__get_free_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (src) { copy_page(src, buf); @@ -384,7 +384,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) ret = hib_wait_io(hb); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_RECLAIM | + src = (void *)__get_free_page(GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (src) { @@ -691,7 +691,7 @@ static int save_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH); + page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH); if (!page) { pr_err("Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -989,7 +989,7 @@ static int get_swap_reader(struct swap_map_handle *handle, last = tmp; tmp->map = (struct swap_map_page *) - __get_free_page(__GFP_RECLAIM | __GFP_HIGH); + __get_free_page(GFP_NOIO | __GFP_HIGH); if (!tmp->map) { release_swap_reader(handle); return -ENOMEM; @@ -1261,8 +1261,8 @@ static int load_image_lzo(struct swap_map_handle *handle, for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? - __GFP_RECLAIM | __GFP_HIGH : - __GFP_RECLAIM | __GFP_NOWARN | + GFP_NOIO | __GFP_HIGH : + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!page[i]) { diff --git a/kernel/resource.c b/kernel/resource.c index 2af6c03858b9..b589dda910b3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -87,7 +87,7 @@ enum { MAX_IORES_LEVEL = 5 }; static void *r_start(struct seq_file *m, loff_t *pos) __acquires(resource_lock) { - struct resource *p = m->private; + struct resource *p = PDE_DATA(file_inode(m->file)); loff_t l = 0; read_lock(&resource_lock); for (p = p->child; p && l < *pos; p = r_next(m, p, &l)) @@ -103,7 +103,7 @@ static void r_stop(struct seq_file *m, void *v) static int r_show(struct seq_file *m, void *v) { - struct resource *root = m->private; + struct resource *root = PDE_DATA(file_inode(m->file)); struct resource *r = v, *p; unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; @@ -135,44 +135,11 @@ static const struct seq_operations resource_op = { .show = r_show, }; -static int ioports_open(struct inode *inode, struct file *file) -{ - int res = seq_open(file, &resource_op); - if (!res) { - struct seq_file *m = file->private_data; - m->private = &ioport_resource; - } - return res; -} - -static int iomem_open(struct inode *inode, struct file *file) -{ - int res = seq_open(file, &resource_op); - if (!res) { - struct seq_file *m = file->private_data; - m->private = &iomem_resource; - } - return res; -} - -static const struct file_operations proc_ioports_operations = { - .open = ioports_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations proc_iomem_operations = { - .open = iomem_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init ioresources_init(void) { - proc_create("ioports", 0, NULL, &proc_ioports_operations); - proc_create("iomem", 0, NULL, &proc_iomem_operations); + proc_create_seq_data("ioports", 0, NULL, &resource_op, + &ioport_resource); + proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource); return 0; } __initcall(ioresources_init); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 092f7c4de903..211890edf37e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -881,6 +881,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) } #ifdef CONFIG_SMP + +static inline bool is_per_cpu_kthread(struct task_struct *p) +{ + if (!(p->flags & PF_KTHREAD)) + return false; + + if (p->nr_cpus_allowed != 1) + return false; + + return true; +} + +/* + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + return false; + + if (is_per_cpu_kthread(p)) + return cpu_online(cpu); + + return cpu_active(cpu); +} + /* * This is how migration works: * @@ -938,16 +965,8 @@ struct migration_arg { static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, struct task_struct *p, int dest_cpu) { - if (p->flags & PF_KTHREAD) { - if (unlikely(!cpu_online(dest_cpu))) - return rq; - } else { - if (unlikely(!cpu_active(dest_cpu))) - return rq; - } - /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!is_cpu_allowed(p, dest_cpu)) return rq; update_rq_clock(rq); @@ -1476,10 +1495,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, &p->cpus_allowed) { - if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) - continue; - if (!cpu_online(dest_cpu)) + if (!is_cpu_allowed(p, dest_cpu)) continue; + goto out; } @@ -1542,8 +1560,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || - !cpu_online(cpu))) + if (unlikely(!is_cpu_allowed(p, cpu))) cpu = select_fallback_rq(task_cpu(p), p); return cpu; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1356afd1eeb6..fbfc3f1d368a 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1259,6 +1259,9 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) rq = task_rq_lock(p, &rf); + sched_clock_tick(); + update_rq_clock(rq); + if (!dl_task(p) || p->state == TASK_DEAD) { struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); @@ -1278,9 +1281,6 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) if (dl_se->dl_non_contending == 0) goto unlock; - sched_clock_tick(); - update_rq_clock(rq); - sub_running_bw(dl_se, &rq->dl); dl_se->dl_non_contending = 0; unlock: diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 15b10e210a6b..e593b4118578 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -823,35 +823,9 @@ static const struct seq_operations sched_debug_sops = { .show = sched_debug_show, }; -static int sched_debug_release(struct inode *inode, struct file *file) -{ - seq_release(inode, file); - - return 0; -} - -static int sched_debug_open(struct inode *inode, struct file *filp) -{ - int ret = 0; - - ret = seq_open(filp, &sched_debug_sops); - - return ret; -} - -static const struct file_operations sched_debug_fops = { - .open = sched_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = sched_debug_release, -}; - static int __init init_sched_debug_procfs(void) { - struct proc_dir_entry *pe; - - pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops); - if (!pe) + if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops)) return -ENOMEM; return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f0a4bc6a39d..cb467c221b15 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -983,7 +983,7 @@ static inline void rq_clock_skip_update(struct rq *rq) } /* - * See rt task throttoling, which is the only time a skip + * See rt task throttling, which is the only time a skip * request is cancelled. */ static inline void rq_clock_cancel_skipupdate(struct rq *rq) diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index ab112cbfd7c8..750fb3c67eed 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -120,22 +120,9 @@ static const struct seq_operations schedstat_sops = { .show = show_schedstat, }; -static int schedstat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &schedstat_sops); -} - -static const struct file_operations proc_schedstat_operations = { - .open = schedstat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_schedstat_init(void) { - proc_create("schedstat", 0, NULL, &proc_schedstat_operations); - + proc_create_seq("schedstat", 0, NULL, &schedstat_sops); return 0; } subsys_initcall(proc_schedstat_init); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 64cc564f5255..61a1125c1ae4 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1708,7 +1708,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att rcu_read_unlock(); if (rq && sched_debug_enabled) { - pr_info("span: %*pbl (max cpu_capacity = %lu)\n", + pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index dc77548167ef..e691d9a6c58d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,8 @@ #include <linux/compat.h> #include <linux/coredump.h> #include <linux/kmemleak.h> +#include <linux/nospec.h> +#include <linux/prctl.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/seccomp.h> @@ -227,8 +229,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } +void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } + static inline void seccomp_assign_mode(struct task_struct *task, - unsigned long seccomp_mode) + unsigned long seccomp_mode, + unsigned long flags) { assert_spin_locked(&task->sighand->siglock); @@ -238,6 +243,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, * filter) is set. */ smp_mb__before_atomic(); + /* Assume default seccomp processes want spec flaw mitigation. */ + if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) + arch_seccomp_spec_mitigate(task); set_tsk_thread_flag(task, TIF_SECCOMP); } @@ -305,7 +313,7 @@ static inline pid_t seccomp_can_sync_threads(void) * without dropping the locks. * */ -static inline void seccomp_sync_threads(void) +static inline void seccomp_sync_threads(unsigned long flags) { struct task_struct *thread, *caller; @@ -346,7 +354,8 @@ static inline void seccomp_sync_threads(void) * allow one thread to transition the other. */ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) - seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, + flags); } } @@ -469,7 +478,7 @@ static long seccomp_attach_filter(unsigned int flags, /* Now that the new filter is in place, synchronize to all threads. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC) - seccomp_sync_threads(); + seccomp_sync_threads(flags); return 0; } @@ -818,7 +827,7 @@ static long seccomp_set_mode_strict(void) #ifdef TIF_NOTSC disable_TSC(); #endif - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, 0); ret = 0; out: @@ -876,7 +885,7 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Do not free the successfully attached filter. */ prepared = NULL; - seccomp_assign_mode(current, seccomp_mode); + seccomp_assign_mode(current, seccomp_mode, flags); out: spin_unlock_irq(¤t->sighand->siglock); if (flags & SECCOMP_FILTER_FLAG_TSYNC) diff --git a/kernel/sys.c b/kernel/sys.c index ad692183dfe9..d1b2b8d934bb 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -61,6 +61,8 @@ #include <linux/uidgid.h> #include <linux/cred.h> +#include <linux/nospec.h> + #include <linux/kmsg_dump.h> /* Move somewhere else to avoid recompiling? */ #include <generated/utsrelease.h> @@ -69,6 +71,9 @@ #include <asm/io.h> #include <asm/unistd.h> +/* Hardening for Spectre-v1 */ +#include <linux/nospec.h> + #include "uid16.h" #ifndef SET_UNALIGN_CTL @@ -1451,6 +1456,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); x = current->signal->rlim[resource]; task_unlock(current->group_leader); @@ -1470,6 +1476,7 @@ COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); r = current->signal->rlim[resource]; task_unlock(current->group_leader); @@ -2242,6 +2249,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data) return 1; } +int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) +{ + return -EINVAL; +} + +int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, + unsigned long ctrl) +{ + return -EINVAL; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2450,6 +2468,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SVE_GET_VL: error = SVE_GET_VL(); break; + case PR_GET_SPECULATION_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_get(me, arg2); + break; + case PR_SET_SPECULATION_CTRL: + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_spec_ctrl_set(me, arg2, arg3); + break; default: error = -EINVAL; break; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 0ed768b56c60..675c4e9563a9 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -372,24 +372,12 @@ static const struct seq_operations timer_list_sops = { .show = timer_list_show, }; -static int timer_list_open(struct inode *inode, struct file *filp) -{ - return seq_open_private(filp, &timer_list_sops, - sizeof(struct timer_list_iter)); -} - -static const struct file_operations timer_list_fops = { - .open = timer_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); + pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops, + sizeof(struct timer_list_iter), NULL); if (!pe) return -ENOMEM; return 0; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 414d7210b2ec..bcd93031d042 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -893,7 +893,7 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -static void tracing_snapshot_instance(struct trace_array *tr) +void tracing_snapshot_instance(struct trace_array *tr) { struct tracer *tracer = tr->current_trace; unsigned long flags; @@ -949,7 +949,7 @@ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, struct trace_buffer *size_buf, int cpu_id); static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); -static int alloc_snapshot(struct trace_array *tr) +int tracing_alloc_snapshot_instance(struct trace_array *tr) { int ret; @@ -995,7 +995,7 @@ int tracing_alloc_snapshot(void) struct trace_array *tr = &global_trace; int ret; - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); WARN_ON(ret < 0); return ret; @@ -5408,7 +5408,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; } @@ -6451,7 +6451,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, } #endif if (!tr->allocated_snapshot) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; } @@ -7179,7 +7179,7 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, return ret; out_reg: - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6fb46a06c9dc..507954b4e058 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1817,6 +1817,17 @@ static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif +#ifdef CONFIG_TRACER_SNAPSHOT +void tracing_snapshot_instance(struct trace_array *tr); +int tracing_alloc_snapshot_instance(struct trace_array *tr); +#else +static inline void tracing_snapshot_instance(struct trace_array *tr) { } +static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) +{ + return 0; +} +#endif + extern struct trace_iterator *tracepoint_print_iter; #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d251cabcf69a..8b5bdcf64871 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -483,9 +483,10 @@ clear_event_triggers(struct trace_array *tr) struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { - struct event_trigger_data *data; - list_for_each_entry_rcu(data, &file->triggers, list) { + struct event_trigger_data *data, *n; + list_for_each_entry_safe(data, n, &file->triggers, list) { trace_event_trigger_enable_disable(file, 0); + list_del_rcu(&data->list); if (data->ops->free) data->ops->free(data->ops, data); } @@ -642,6 +643,7 @@ event_trigger_callback(struct event_command *cmd_ops, trigger_data->count = -1; trigger_data->ops = trigger_ops; trigger_data->cmd_ops = cmd_ops; + trigger_data->private_data = file; INIT_LIST_HEAD(&trigger_data->list); INIT_LIST_HEAD(&trigger_data->named_list); @@ -1053,7 +1055,12 @@ static void snapshot_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - tracing_snapshot(); + struct trace_event_file *file = data->private_data; + + if (file) + tracing_snapshot_instance(file->tr); + else + tracing_snapshot(); } static void @@ -1076,7 +1083,7 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, { int ret = register_trigger(glob, ops, data, file); - if (ret > 0 && tracing_alloc_snapshot() != 0) { + if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) { unregister_trigger(glob, ops, data, file); ret = 0; } diff --git a/lib/Kconfig b/lib/Kconfig index 5fe577673b98..7a913937888b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -429,15 +429,50 @@ config SGL_ALLOC bool default n +config NEED_SG_DMA_LENGTH + bool + +config NEED_DMA_MAP_STATE + bool + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT || PHYS_ADDR_T_64BIT + +config IOMMU_HELPER + bool + +config ARCH_HAS_SYNC_DMA_FOR_DEVICE + bool + +config ARCH_HAS_SYNC_DMA_FOR_CPU + bool + select NEED_DMA_MAP_STATE + config DMA_DIRECT_OPS bool - depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) - default n + depends on HAS_DMA + +config DMA_NONCOHERENT_OPS + bool + depends on HAS_DMA + select DMA_DIRECT_OPS + +config DMA_NONCOHERENT_MMAP + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_NONCOHERENT_CACHE_SYNC + bool + depends on DMA_NONCOHERENT_OPS config DMA_VIRT_OPS bool - depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) - default n + depends on HAS_DMA + +config SWIOTLB + bool + select DMA_DIRECT_OPS + select NEED_DMA_MAP_STATE config CHECK_SIGNATURE bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c40c7b734cd1..76555479ae36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1634,7 +1634,7 @@ config PROVIDE_OHCI1394_DMA_INIT config DMA_API_DEBUG bool "Enable debugging of DMA-API usage" - depends on HAVE_DMA_API_DEBUG + select NEED_DMA_MAP_STATE help Enable this option to debug the use of the DMA API by device drivers. With this option you will be able to detect common bugs in device @@ -1651,6 +1651,23 @@ config DMA_API_DEBUG If unsure, say N. +config DMA_API_DEBUG_SG + bool "Debug DMA scatter-gather usage" + default y + depends on DMA_API_DEBUG + help + Perform extra checking that callers of dma_map_sg() have respected the + appropriate segment length/boundary limits for the given device when + preparing DMA scatterlists. + + This is particularly likely to have been overlooked in cases where the + dma_map_sg() API is used for general bulk mapping of pages rather than + preparing literal scatter-gather descriptors, where there is a risk of + unexpected behaviour from DMA API implementations if the scatterlist + is technically out-of-spec. + + If unsure, say N. + menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" def_bool y diff --git a/lib/Makefile b/lib/Makefile index ce20696d5a92..9f18c8152281 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,6 +30,7 @@ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o +lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o @@ -147,7 +148,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 7f5cdc1e6b29..c007d25bee09 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -41,6 +41,11 @@ #define HASH_FN_SHIFT 13 #define HASH_FN_MASK (HASH_SIZE - 1) +/* allow architectures to override this if absolutely required */ +#ifndef PREALLOC_DMA_DEBUG_ENTRIES +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +#endif + enum { dma_debug_single, dma_debug_page, @@ -127,7 +132,7 @@ static u32 min_free_entries; static u32 nr_total_entries; /* number of preallocated entries requested by kernel cmdline */ -static u32 req_entries; +static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; /* debugfs dentry's for the stuff above */ static struct dentry *dma_debug_dent __read_mostly; @@ -439,7 +444,6 @@ void debug_dma_dump_mappings(struct device *dev) spin_unlock_irqrestore(&bucket->lock, flags); } } -EXPORT_SYMBOL(debug_dma_dump_mappings); /* * For each mapping (initial cacheline in the case of @@ -748,7 +752,6 @@ int dma_debug_resize_entries(u32 num_entries) return ret; } -EXPORT_SYMBOL(dma_debug_resize_entries); /* * DMA-API debugging init code @@ -1004,10 +1007,7 @@ void dma_debug_add_bus(struct bus_type *bus) bus_register_notifier(bus, nb); } -/* - * Let the architectures decide how many entries should be preallocated. - */ -void dma_debug_init(u32 num_entries) +static int dma_debug_init(void) { int i; @@ -1015,7 +1015,7 @@ void dma_debug_init(u32 num_entries) * called to set dma_debug_initialized */ if (global_disable) - return; + return 0; for (i = 0; i < HASH_SIZE; ++i) { INIT_LIST_HEAD(&dma_entry_hash[i].list); @@ -1026,17 +1026,14 @@ void dma_debug_init(u32 num_entries) pr_err("DMA-API: error creating debugfs entries - disabling\n"); global_disable = true; - return; + return 0; } - if (req_entries) - num_entries = req_entries; - - if (prealloc_memory(num_entries) != 0) { + if (prealloc_memory(nr_prealloc_entries) != 0) { pr_err("DMA-API: debugging out of memory error - disabled\n"); global_disable = true; - return; + return 0; } nr_total_entries = num_free_entries; @@ -1044,7 +1041,9 @@ void dma_debug_init(u32 num_entries) dma_debug_initialized = true; pr_info("DMA-API: debugging enabled by kernel config\n"); + return 0; } +core_initcall(dma_debug_init); static __init int dma_debug_cmdline(char *str) { @@ -1061,16 +1060,10 @@ static __init int dma_debug_cmdline(char *str) static __init int dma_debug_entries_cmdline(char *str) { - int res; - if (!str) return -EINVAL; - - res = get_option(&str, &req_entries); - - if (!res) - req_entries = 0; - + if (!get_option(&str, &nr_prealloc_entries)) + nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; return 0; } @@ -1293,6 +1286,32 @@ out: put_hash_bucket(bucket, &flags); } +static void check_sg_segment(struct device *dev, struct scatterlist *sg) +{ +#ifdef CONFIG_DMA_API_DEBUG_SG + unsigned int max_seg = dma_get_max_seg_size(dev); + u64 start, end, boundary = dma_get_seg_boundary(dev); + + /* + * Either the driver forgot to set dma_parms appropriately, or + * whoever generated the list forgot to check them. + */ + if (sg->length > max_seg) + err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", + sg->length, max_seg); + /* + * In some cases this could potentially be the DMA API + * implementation's fault, but it would usually imply that + * the scatterlist was built inappropriately to begin with. + */ + start = sg_dma_address(sg); + end = start + sg_dma_len(sg) - 1; + if ((start ^ end) & ~boundary) + err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", + start, end, boundary); +#endif +} + void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, bool map_single) @@ -1423,6 +1442,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); } + check_sg_segment(dev, s); + add_dma_entry(entry); } } diff --git a/lib/dma-direct.c b/lib/dma-direct.c index bbfb229aa067..8be8106270c2 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -34,6 +34,13 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, const char *caller) { if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + if (!dev->dma_mask) { + dev_err(dev, + "%s: call on device without dma_mask\n", + caller); + return false; + } + if (*dev->dma_mask >= DMA_BIT_MASK(32)) { dev_err(dev, "%s: overflow %pad+%zu of device mask %llx\n", @@ -84,6 +91,13 @@ again: __free_pages(page, page_order); page = NULL; + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + dev->coherent_dma_mask < DMA_BIT_MASK(64) && + !(gfp & (GFP_DMA32 | GFP_DMA))) { + gfp |= GFP_DMA32; + goto again; + } + if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) { @@ -121,7 +135,7 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, free_pages((unsigned long)cpu_addr, page_order); } -static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { @@ -132,8 +146,8 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, return dma_addr; } -static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) { int i; struct scatterlist *sg; @@ -165,10 +179,16 @@ int dma_direct_supported(struct device *dev, u64 mask) if (mask < DMA_BIT_MASK(32)) return 0; #endif + /* + * Various PCI/PCIe bridges have broken support for > 32bit DMA even + * if the device itself might support it. + */ + if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) + return 0; return 1; } -static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) +int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == DIRECT_MAPPING_ERROR; } @@ -180,6 +200,5 @@ const struct dma_map_ops dma_direct_ops = { .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, - .is_phys = 1, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c new file mode 100644 index 000000000000..79e9a757387f --- /dev/null +++ b/lib/dma-noncoherent.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without providing cache + * coherence. + */ +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> +#include <linux/scatterlist.h> + +static void dma_noncoherent_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + +static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + dma_addr_t addr; + + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(dev, page_to_phys(page) + offset, + size, dir); + return addr; +} + +static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); + if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); + return nents; +} + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +static void dma_noncoherent_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); +} + +static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); +} +#endif + +const struct dma_map_ops dma_noncoherent_ops = { + .alloc = arch_dma_alloc, + .free = arch_dma_free, + .mmap = arch_dma_mmap, + .sync_single_for_device = dma_noncoherent_sync_single_for_device, + .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, + .map_page = dma_noncoherent_map_page, + .map_sg = dma_noncoherent_map_sg, +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU + .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, + .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, + .unmap_page = dma_noncoherent_unmap_page, + .unmap_sg = dma_noncoherent_unmap_sg, +#endif + .dma_supported = dma_direct_supported, + .mapping_error = dma_direct_mapping_error, + .cache_sync = arch_dma_cache_sync, +}; +EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 23633c0fda4a..92a9f243c0e2 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -3,19 +3,8 @@ * IOMMU helper functions for the free area management */ -#include <linux/export.h> #include <linux/bitmap.h> -#include <linux/bug.h> - -int iommu_is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size) -{ - BUG_ON(!is_power_of_2(boundary_size)); - - shift = (shift + index) & (boundary_size - 1); - return shift + nr > boundary_size; -} +#include <linux/iommu-helper.h> unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, @@ -38,4 +27,3 @@ again: } return -1; } -EXPORT_SYMBOL(iommu_area_alloc); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..fdae394172fa 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1012,7 +1012,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static inline size_t __pipe_get_pages(struct iov_iter *i, +static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, int idx, @@ -1102,7 +1102,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - size_t n; + ssize_t n; int idx; int npages; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 43e0cbedc3a0..a9e41aed6de4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2034,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node = NULL; - void __rcu **slot; + void __rcu **slot = NULL; void *entry; entry = __radix_tree_lookup(root, index, &node, &slot); + if (!slot) + return NULL; if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, get_slot_offset(node, slot)))) return NULL; diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e6a9c06ec70c..6fdc6267f4a8 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); -static unsigned int sbq_calc_wake_batch(unsigned int depth) +static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { unsigned int wake_batch; + unsigned int shallow_depth; /* * For each batch, we wake up one queue. We need to make sure that our - * batch size is small enough that the full depth of the bitmap is - * enough to wake up all of the queues. + * batch size is small enough that the full depth of the bitmap, + * potentially limited by a shallow depth, is enough to wake up all of + * the queues. + * + * Each full word of the bitmap has bits_per_word bits, and there might + * be a partial word. There are depth / bits_per_word full words and + * depth % bits_per_word bits left over. In bitwise arithmetic: + * + * bits_per_word = 1 << shift + * depth / bits_per_word = depth >> shift + * depth % bits_per_word = depth & ((1 << shift) - 1) + * + * Each word can be limited to sbq->min_shallow_depth bits. */ - wake_batch = SBQ_WAKE_BATCH; - if (wake_batch > depth / SBQ_WAIT_QUEUES) - wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); + shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); + depth = ((depth >> sbq->sb.shift) * shallow_depth + + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); + wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, + SBQ_WAKE_BATCH); return wake_batch; } @@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; } - sbq->wake_batch = sbq_calc_wake_batch(depth); + sbq->min_shallow_depth = UINT_MAX; + sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); @@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { - unsigned int wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { WRITE_ONCE(sbq->wake_batch, wake_batch); /* - * Pairs with the memory barrier in sbq_wake_up() to ensure that - * the batch size is updated before the wait counts. + * Pairs with the memory barrier in sbitmap_queue_wake_up() + * to ensure that the batch size is updated before the wait + * counts. */ smp_mb__before_atomic(); for (i = 0; i < SBQ_WAIT_QUEUES; i++) atomic_set(&sbq->ws[i].wait_cnt, 1); } +} + +void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +{ + sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int hint, depth; int nr; + WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); + hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { @@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, + unsigned int min_shallow_depth) +{ + sbq->min_shallow_depth = min_shallow_depth; + sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); + static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; @@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) return NULL; } -static void sbq_wake_up(struct sbitmap_queue *sbq) +static bool __sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; unsigned int wake_batch; int wait_cnt; - /* - * Pairs with the memory barrier in set_current_state() to ensure the - * proper ordering of clear_bit()/waitqueue_active() in the waker and - * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the - * waiter. See the comment on waitqueue_active(). This is __after_atomic - * because we just did clear_bit_unlock() in the caller. - */ - smp_mb__after_atomic(); - ws = sbq_wake_ptr(sbq); if (!ws) - return; + return false; wait_cnt = atomic_dec_return(&ws->wait_cnt); if (wait_cnt <= 0) { + int ret; + wake_batch = READ_ONCE(sbq->wake_batch); + /* * Pairs with the memory barrier in sbitmap_queue_resize() to * ensure that we see the batch size update before the wait * count is reset. */ smp_mb__before_atomic(); + /* - * If there are concurrent callers to sbq_wake_up(), the last - * one to decrement the wait count below zero will bump it back - * up. If there is a concurrent resize, the count reset will - * either cause the cmpxchg to fail or overwrite after the - * cmpxchg. + * For concurrent callers of this, the one that failed the + * atomic_cmpxhcg() race should call this function again + * to wakeup a new batch on a different 'ws'. */ - atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); - sbq_index_atomic_inc(&sbq->wake_index); - wake_up_nr(&ws->wait, wake_batch); + ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); + if (ret == wait_cnt) { + sbq_index_atomic_inc(&sbq->wake_index); + wake_up_nr(&ws->wait, wake_batch); + return false; + } + + return true; } + + return false; +} + +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) +{ + while (__sbq_wake_up(sbq)) + ; } +EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { sbitmap_clear_bit_unlock(&sbq->sb, nr); - sbq_wake_up(sbq); + /* + * Pairs with the memory barrier in set_current_state() to ensure the + * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker + * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the + * waiter. See the comment on waitqueue_active(). + */ + smp_mb__after_atomic(); + sbitmap_queue_wake_up(sbq); + if (likely(!sbq->round_robin && nr < sbq->sb.depth)) *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; } @@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) /* * Pairs with the memory barrier in set_current_state() like in - * sbq_wake_up(). + * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); @@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->round_robin); + seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cc640588f145..04b68d9dffac 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -593,9 +593,8 @@ found: } /* - * Allocates bounce buffer and returns its kernel virtual address. + * Allocates bounce buffer and returns its physical address. */ - static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -614,7 +613,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, } /* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. + * tlb_addr is the physical address of the bounce buffer to unmap. */ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, @@ -692,7 +691,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } -#ifdef CONFIG_DMA_DIRECT_OPS static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, size_t size) { @@ -727,7 +725,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, out_unmap: dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)(dev ? dev->coherent_dma_mask : 0), + (unsigned long long)dev->coherent_dma_mask, (unsigned long long)*dma_handle); /* @@ -764,7 +762,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, DMA_ATTR_SKIP_CPU_SYNC); return true; } -#endif static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, @@ -1045,7 +1042,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } -#ifdef CONFIG_DMA_DIRECT_OPS void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -1089,4 +1085,3 @@ const struct dma_map_ops swiotlb_dma_ops = { .unmap_page = swiotlb_unmap_page, .dma_supported = dma_direct_supported, }; -#endif /* CONFIG_DMA_DIRECT_OPS */ diff --git a/mm/Kconfig b/mm/Kconfig index e14c01513bfd..9673e7fbb4f0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -266,7 +266,7 @@ config ARCH_ENABLE_THP_MIGRATION bool config PHYS_ADDR_T_64BIT - def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT + def_bool 64BIT config BOUNCE bool "Enable bounce buffers" diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7441bd93b732..8fe3ebd6ac00 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -412,6 +412,7 @@ static void wb_exit(struct bdi_writeback *wb) * protected. */ static DEFINE_SPINLOCK(cgwb_lock); +static struct workqueue_struct *cgwb_release_wq; /** * wb_congested_get_create - get or create a wb_congested @@ -522,7 +523,7 @@ static void cgwb_release(struct percpu_ref *refcnt) { struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback, refcnt); - schedule_work(&wb->release_work); + queue_work(cgwb_release_wq, &wb->release_work); } static void cgwb_kill(struct bdi_writeback *wb) @@ -784,6 +785,21 @@ static void cgwb_bdi_register(struct backing_dev_info *bdi) spin_unlock_irq(&cgwb_lock); } +static int __init cgwb_init(void) +{ + /* + * There can be many concurrent release work items overwhelming + * system_wq. Put them in a separate wq and limit concurrency. + * There's no point in executing many of these in parallel. + */ + cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1); + if (!cgwb_release_wq) + return -ENOMEM; + + return 0; +} +subsys_initcall(cgwb_init); + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) @@ -39,7 +39,6 @@ #include <trace/events/cma.h> #include "cma.h" -#include "internal.h" struct cma cma_areas[MAX_CMA_AREAS]; unsigned cma_area_count; @@ -110,25 +109,23 @@ static int __init cma_activate_area(struct cma *cma) if (!cma->bitmap) return -ENOMEM; + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + do { unsigned j; base_pfn = pfn; - if (!pfn_valid(base_pfn)) - goto err; - - zone = page_zone(pfn_to_page(base_pfn)); for (j = pageblock_nr_pages; j; --j, pfn++) { - if (!pfn_valid(pfn)) - goto err; - + WARN_ON_ONCE(!pfn_valid(pfn)); /* - * In init_cma_reserved_pageblock(), present_pages - * is adjusted with assumption that all pages in - * the pageblock come from a single zone. + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. */ if (page_zone(pfn_to_page(pfn)) != zone) - goto err; + goto not_in_zone; } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); @@ -142,7 +139,7 @@ static int __init cma_activate_area(struct cma *cma) return 0; -err: +not_in_zone: pr_err("CMA area %s could not be activated\n", cma->name); kfree(cma->bitmap); cma->count = 0; @@ -152,41 +149,6 @@ err: static int __init cma_init_reserved_areas(void) { int i; - struct zone *zone; - pg_data_t *pgdat; - - if (!cma_area_count) - return 0; - - for_each_online_pgdat(pgdat) { - unsigned long start_pfn = UINT_MAX, end_pfn = 0; - - zone = &pgdat->node_zones[ZONE_MOVABLE]; - - /* - * In this case, we cannot adjust the zone range - * since it is now maximum node span and we don't - * know original zone range. - */ - if (populated_zone(zone)) - continue; - - for (i = 0; i < cma_area_count; i++) { - if (pfn_to_nid(cma_areas[i].base_pfn) != - pgdat->node_id) - continue; - - start_pfn = min(start_pfn, cma_areas[i].base_pfn); - end_pfn = max(end_pfn, cma_areas[i].base_pfn + - cma_areas[i].count); - } - - if (!end_pfn) - continue; - - zone->zone_start_pfn = start_pfn; - zone->spanned_pages = end_pfn - start_pfn; - } for (i = 0; i < cma_area_count; i++) { int ret = cma_activate_area(&cma_areas[i]); @@ -195,32 +157,9 @@ static int __init cma_init_reserved_areas(void) return ret; } - /* - * Reserved pages for ZONE_MOVABLE are now activated and - * this would change ZONE_MOVABLE's managed page counter and - * the other zones' present counter. We need to re-calculate - * various zone information that depends on this initialization. - */ - build_all_zonelists(NULL); - for_each_populated_zone(zone) { - if (zone_idx(zone) == ZONE_MOVABLE) { - zone_pcp_reset(zone); - setup_zone_pageset(zone); - } else - zone_pcp_update(zone); - - set_zone_contiguous(zone); - } - - /* - * We need to re-init per zone wmark by calling - * init_per_zone_wmark_min() but doesn't call here because it is - * registered on core_initcall and it will be called later than us. - */ - return 0; } -pure_initcall(cma_init_reserved_areas); +core_initcall(cma_init_reserved_areas); /** * cma_init_reserved_mem() - create custom contiguous area from reserved memory diff --git a/mm/compaction.c b/mm/compaction.c index 028b7210a669..29bd1df18b98 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1450,12 +1450,14 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, * if compaction succeeds. * For costly orders, we require low watermark instead of min for * compaction to proceed to increase its chances. + * ALLOC_CMA is used, as pages in CMA pageblocks are considered + * suitable migration targets */ watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? low_wmark_pages(zone) : min_wmark_pages(zone); watermark += compact_gap(order); if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, - 0, wmark_target)) + ALLOC_CMA, wmark_target)) return COMPACT_SKIPPED; return COMPACT_CONTINUE; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a3a1815f8e11..b9f3dbd885bd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2431,7 +2431,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ if (head[i].index >= end) { - __ClearPageDirty(head + i); + ClearPageDirty(head + i); __delete_from_page_cache(head + i, NULL); if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) shmem_uncharge(head->mapping->host, 1); diff --git a/mm/internal.h b/mm/internal.h index 62d8c34e63d5..502d14189794 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -168,9 +168,6 @@ extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -498,6 +495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index bc0e68f7dc75..f185455b3406 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -792,6 +792,40 @@ DEFINE_ASAN_SET_SHADOW(f5); DEFINE_ASAN_SET_SHADOW(f8); #ifdef CONFIG_MEMORY_HOTPLUG +static bool shadow_mapped(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + return false; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return false; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return false; + + /* + * We can't use pud_large() or pud_huge(), the first one is + * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse + * pud_bad(), if pud is bad then it's bad because it's huge. + */ + if (pud_bad(*pud)) + return true; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + + if (pmd_bad(*pmd)) + return true; + pte = pte_offset_kernel(pmd, addr); + return !pte_none(*pte); +} + static int __meminit kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -813,6 +847,14 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, case MEM_GOING_ONLINE: { void *ret; + /* + * If shadow is mapped already than it must have been mapped + * during the boot. This could happen if we onlining previously + * offlined memory. + */ + if (shadow_mapped(shadow_start)) + return NOTIFY_OK; + ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, shadow_end, GFP_KERNEL, PAGE_KERNEL, VM_NO_GUARD, @@ -824,8 +866,26 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, kmemleak_ignore(ret); return NOTIFY_OK; } - case MEM_OFFLINE: - vfree((void *)shadow_start); + case MEM_CANCEL_ONLINE: + case MEM_OFFLINE: { + struct vm_struct *vm; + + /* + * shadow_start was either mapped during boot by kasan_init() + * or during memory online by __vmalloc_node_range(). + * In the latter case we can use vfree() to free shadow. + * Non-NULL result of the find_vm_area() will tell us if + * that was the second case. + * + * Currently it's not possible to free shadow mapped + * during boot by kasan_init(). It's because the code + * to do that hasn't been written yet. So we'll just + * leak the memory. + */ + vm = find_vm_area((void *)shadow_start); + if (vm) + vfree((void *)shadow_start); + } } return NOTIFY_OK; @@ -838,5 +898,5 @@ static int __init kasan_memhotplug_init(void) return 0; } -module_init(kasan_memhotplug_init); +core_initcall(kasan_memhotplug_init); #endif diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f74826cdceea..25982467800b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1158,7 +1158,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) * nodes have to go through register_node. * TODO clean up this mess. */ - ret = link_mem_sections(nid, start_pfn, nr_pages); + ret = link_mem_sections(nid, start_pfn, nr_pages, false); register_fail: /* * If sysfs file of new node can't create, cpu on the node diff --git a/mm/mempool.c b/mm/mempool.c index 5c9dce34719b..b54f2c20e5e0 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -138,6 +138,28 @@ static void *remove_element(mempool_t *pool, gfp_t flags) } /** + * mempool_exit - exit a mempool initialized with mempool_init() + * @pool: pointer to the memory pool which was initialized with + * mempool_init(). + * + * Free all reserved elements in @pool and @pool itself. This function + * only sleeps if the free_fn() function sleeps. + * + * May be called on a zeroed but uninitialized mempool (i.e. allocated with + * kzalloc()). + */ +void mempool_exit(mempool_t *pool) +{ + while (pool->curr_nr) { + void *element = remove_element(pool, GFP_KERNEL); + pool->free(element, pool->pool_data); + } + kfree(pool->elements); + pool->elements = NULL; +} +EXPORT_SYMBOL(mempool_exit); + +/** * mempool_destroy - deallocate a memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). @@ -150,15 +172,65 @@ void mempool_destroy(mempool_t *pool) if (unlikely(!pool)) return; - while (pool->curr_nr) { - void *element = remove_element(pool, GFP_KERNEL); - pool->free(element, pool->pool_data); - } - kfree(pool->elements); + mempool_exit(pool); kfree(pool); } EXPORT_SYMBOL(mempool_destroy); +int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id) +{ + spin_lock_init(&pool->lock); + pool->min_nr = min_nr; + pool->pool_data = pool_data; + pool->alloc = alloc_fn; + pool->free = free_fn; + init_waitqueue_head(&pool->wait); + + pool->elements = kmalloc_array_node(min_nr, sizeof(void *), + gfp_mask, node_id); + if (!pool->elements) + return -ENOMEM; + + /* + * First pre-allocate the guaranteed number of buffers. + */ + while (pool->curr_nr < pool->min_nr) { + void *element; + + element = pool->alloc(gfp_mask, pool->pool_data); + if (unlikely(!element)) { + mempool_exit(pool); + return -ENOMEM; + } + add_element(pool, element); + } + + return 0; +} +EXPORT_SYMBOL(mempool_init_node); + +/** + * mempool_init - initialize a memory pool + * @min_nr: the minimum number of elements guaranteed to be + * allocated for this pool. + * @alloc_fn: user-defined element-allocation function. + * @free_fn: user-defined element-freeing function. + * @pool_data: optional private data available to the user-defined functions. + * + * Like mempool_create(), but initializes the pool in (i.e. embedded in another + * structure). + */ +int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data) +{ + return mempool_init_node(pool, min_nr, alloc_fn, free_fn, + pool_data, GFP_KERNEL, NUMA_NO_NODE); + +} +EXPORT_SYMBOL(mempool_init); + /** * mempool_create - create a memory pool * @min_nr: the minimum number of elements guaranteed to be @@ -186,35 +258,17 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, gfp_t gfp_mask, int node_id) { mempool_t *pool; + pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); if (!pool) return NULL; - pool->elements = kmalloc_array_node(min_nr, sizeof(void *), - gfp_mask, node_id); - if (!pool->elements) { + + if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, + gfp_mask, node_id)) { kfree(pool); return NULL; } - spin_lock_init(&pool->lock); - pool->min_nr = min_nr; - pool->pool_data = pool_data; - init_waitqueue_head(&pool->wait); - pool->alloc = alloc_fn; - pool->free = free_fn; - /* - * First pre-allocate the guaranteed number of buffers. - */ - while (pool->curr_nr < pool->min_nr) { - void *element; - - element = pool->alloc(gfp_mask, pool->pool_data); - if (unlikely(!element)) { - mempool_destroy(pool); - return NULL; - } - add_element(pool, element); - } return pool; } EXPORT_SYMBOL(mempool_create_node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 905db9d7962f..22320ea27489 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1743,38 +1743,16 @@ void __init page_alloc_init_late(void) } #ifdef CONFIG_CMA -static void __init adjust_present_page_count(struct page *page, long count) -{ - struct zone *zone = page_zone(page); - - /* We don't need to hold a lock since it is boot-up process */ - zone->present_pages += count; -} - /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void __init init_cma_reserved_pageblock(struct page *page) { unsigned i = pageblock_nr_pages; - unsigned long pfn = page_to_pfn(page); struct page *p = page; - int nid = page_to_nid(page); - - /* - * ZONE_MOVABLE will steal present pages from other zones by - * changing page links so page_zone() is changed. Before that, - * we need to adjust previous zone's page count first. - */ - adjust_present_page_count(page, -pageblock_nr_pages); do { __ClearPageReserved(p); set_page_count(p, 0); - - /* Steal pages from other zones */ - set_page_links(p, ZONE_MOVABLE, nid, pfn); - } while (++p, ++pfn, --i); - - adjust_present_page_count(page, pageblock_nr_pages); + } while (++p, --i); set_pageblock_migratetype(page, MIGRATE_CMA); @@ -2889,7 +2867,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * exists. */ watermark = min_wmark_pages(zone) + (1UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -3165,6 +3143,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); +#endif + /* * Check watermarks for an order-0 allocation request. If these * are not met, then a high-order request also cannot go ahead @@ -3191,8 +3175,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } #ifdef CONFIG_CMA - if (!list_empty(&area->free_list[MIGRATE_CMA])) + if ((alloc_flags & ALLOC_CMA) && + !list_empty(&area->free_list[MIGRATE_CMA])) { return true; + } #endif if (alloc_harder && !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) @@ -3212,6 +3198,13 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags) { long free_pages = zone_page_state(z, NR_FREE_PAGES); + long cma_pages = 0; + +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES); +#endif /* * Fast check for order-0 only. If this fails then the reserves @@ -3220,7 +3213,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * the caller is !atomic then it'll uselessly search the free * list. That corner case is then slower but it is harmless. */ - if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx]) + if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx]) return true; return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, @@ -3856,6 +3849,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; +#ifdef CONFIG_CMA + if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) + alloc_flags |= ALLOC_CMA; +#endif return alloc_flags; } @@ -4322,6 +4319,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false; + if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) + *alloc_flags |= ALLOC_CMA; + return true; } @@ -6204,7 +6204,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) { enum zone_type j; int nid = pgdat->node_id; - unsigned long node_end_pfn = 0; pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING @@ -6232,13 +6231,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; unsigned long zone_start_pfn = zone->zone_start_pfn; - unsigned long movable_size = 0; size = zone->spanned_pages; realsize = freesize = zone->present_pages; - if (zone_end_pfn(zone) > node_end_pfn) - node_end_pfn = zone_end_pfn(zone); - /* * Adjust freesize so that it accounts for how much memory @@ -6287,30 +6282,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone_seqlock_init(zone); zone_pcp_init(zone); - /* - * The size of the CMA area is unknown now so we need to - * prepare the memory for the usemap at maximum. - */ - if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE && - pgdat->node_spanned_pages) { - movable_size = node_end_pfn - pgdat->node_start_pfn; - } - - if (!size && !movable_size) + if (!size) continue; set_pageblock_order(); - if (movable_size) { - zone->zone_start_pfn = pgdat->node_start_pfn; - zone->spanned_pages = movable_size; - setup_usemap(pgdat, zone, - pgdat->node_start_pfn, movable_size); - init_currently_empty_zone(zone, - pgdat->node_start_pfn, movable_size); - } else { - setup_usemap(pgdat, zone, zone_start_pfn, size); - init_currently_empty_zone(zone, zone_start_pfn, size); - } + setup_usemap(pgdat, zone, zone_start_pfn, size); + init_currently_empty_zone(zone, zone_start_pfn, size); memmap_init(size, nid, j, zone_start_pfn); } } @@ -7621,11 +7598,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unsigned long pfn, iter, found; /* - * For avoiding noise data, lru_add_drain_all() should be called - * If ZONE_MOVABLE, the zone never contains unmovable pages + * TODO we could make this much more efficient by not checking every + * page in the range if we know all of them are in MOVABLE_ZONE and + * that the movable zone guarantees that pages are migratable but + * the later is not the case right now unfortunatelly. E.g. movablecore + * can still lead to having bootmem allocations in zone_movable. */ - if (zone_idx(zone) == ZONE_MOVABLE) - return false; /* * CMA allocations (alloc_contig_range) really need to mark isolate @@ -7646,7 +7624,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, page = pfn_to_page(check); if (PageReserved(page)) - return true; + goto unmovable; /* * Hugepages are not in LRU lists, but they're movable. @@ -7696,9 +7674,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * page at boot. */ if (found > count) - return true; + goto unmovable; } return false; +unmovable: + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); + return true; } bool is_pageblock_removable_nolock(struct page *page) @@ -7951,7 +7932,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) } #endif -#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA +#ifdef CONFIG_MEMORY_HOTPLUG /* * The zone indicated has a new number of managed_pages; batch sizes and percpu * page high values need to be recalulated. diff --git a/mm/swapfile.c b/mm/swapfile.c index cc2cf04d9018..78a015fcec3b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3112,6 +3112,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; + bool inced_nr_rotate_swap = false; if (swap_flags & ~SWAP_FLAGS_VALID) return -EINVAL; @@ -3215,8 +3216,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) cluster = per_cpu_ptr(p->percpu_cluster, cpu); cluster_set_null(&cluster->index); } - } else + } else { atomic_inc(&nr_rotate_swap); + inced_nr_rotate_swap = true; + } error = swap_cgroup_swapon(p->type, maxpages); if (error) @@ -3307,6 +3310,8 @@ bad_swap: vfree(swap_map); kvfree(cluster_info); kvfree(frontswap_map); + if (inced_nr_rotate_swap) + atomic_dec(&nr_rotate_swap); if (swap_file) { if (inode && S_ISREG(inode->i_mode)) { inode_unlock(inode); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ebff729cc956..63a5f502da08 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2751,25 +2751,14 @@ static const struct seq_operations vmalloc_op = { .show = s_show, }; -static int vmalloc_open(struct inode *inode, struct file *file) +static int __init proc_vmalloc_init(void) { if (IS_ENABLED(CONFIG_NUMA)) - return seq_open_private(file, &vmalloc_op, - nr_node_ids * sizeof(unsigned int)); + proc_create_seq_private("vmallocinfo", S_IRUSR, NULL, + &vmalloc_op, + nr_node_ids * sizeof(unsigned int), NULL); else - return seq_open(file, &vmalloc_op); -} - -static const struct file_operations proc_vmalloc_operations = { - .open = vmalloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -static int __init proc_vmalloc_init(void) -{ - proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); + proc_create_seq("vmallocinfo", S_IRUSR, NULL, &vmalloc_op); return 0; } module_init(proc_vmalloc_init); diff --git a/mm/vmscan.c b/mm/vmscan.c index 9b697323a88c..9270a4370d54 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1418,7 +1418,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) return ret; mapping = page_mapping(page); - migrate_dirty = mapping && mapping->a_ops->migratepage; + migrate_dirty = !mapping || mapping->a_ops->migratepage; unlock_page(page); if (!migrate_dirty) return ret; diff --git a/mm/vmstat.c b/mm/vmstat.c index a2b9518980ce..75eda9c2b260 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1516,18 +1516,6 @@ static const struct seq_operations fragmentation_op = { .show = frag_show, }; -static int fragmentation_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &fragmentation_op); -} - -static const struct file_operations buddyinfo_file_operations = { - .open = fragmentation_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static const struct seq_operations pagetypeinfo_op = { .start = frag_start, .next = frag_next, @@ -1535,18 +1523,6 @@ static const struct seq_operations pagetypeinfo_op = { .show = pagetypeinfo_show, }; -static int pagetypeinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pagetypeinfo_op); -} - -static const struct file_operations pagetypeinfo_file_operations = { - .open = pagetypeinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) { int zid; @@ -1663,18 +1639,6 @@ static const struct seq_operations zoneinfo_op = { .show = zoneinfo_show, }; -static int zoneinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &zoneinfo_op); -} - -static const struct file_operations zoneinfo_file_operations = { - .open = zoneinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - enum writeback_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, @@ -1762,18 +1726,6 @@ static const struct seq_operations vmstat_op = { .stop = vmstat_stop, .show = vmstat_show, }; - -static int vmstat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &vmstat_op); -} - -static const struct file_operations vmstat_file_operations = { - .open = vmstat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SMP @@ -2020,10 +1972,10 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif #ifdef CONFIG_PROC_FS - proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations); - proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations); - proc_create("vmstat", 0444, NULL, &vmstat_file_operations); - proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations); + proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); + proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op); + proc_create_seq("vmstat", 0444, NULL, &vmstat_op); + proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); #endif } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index a627a5db2125..d36e8c4b7f56 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -73,35 +73,6 @@ static const struct seq_operations vlan_seq_ops = { .show = vlan_seq_show, }; -static int vlan_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &vlan_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations vlan_fops = { - .open = vlan_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* - * /proc/net/vlan/<device> file and inode operations - */ - -static int vlandev_seq_open(struct inode *inode, struct file *file) -{ - return single_open(file, vlandev_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations vlandev_fops = { - .open = vlandev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * Proc filesystem directory entries. */ @@ -148,8 +119,9 @@ int __net_init vlan_proc_init(struct net *net) if (!vn->proc_vlan_dir) goto err; - vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600, - vn->proc_vlan_dir, &vlan_fops); + vn->proc_vlan_conf = proc_create_net(name_conf, S_IFREG | 0600, + vn->proc_vlan_dir, &vlan_seq_ops, + sizeof(struct seq_net_private)); if (!vn->proc_vlan_conf) goto err; return 0; @@ -171,9 +143,8 @@ int vlan_proc_add_dev(struct net_device *vlandev) if (!strcmp(vlandev->name, name_conf)) return -EINVAL; - vlan->dent = - proc_create_data(vlandev->name, S_IFREG | 0600, - vn->proc_vlan_dir, &vlandev_fops, vlandev); + vlan->dent = proc_create_single_data(vlandev->name, S_IFREG | 0600, + vn->proc_vlan_dir, vlandev_seq_show, vlandev); if (!vlan->dent) return -ENOBUFS; return 0; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index d4c1021e74e1..49a16cee2aae 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -907,11 +907,6 @@ void aarp_device_down(struct net_device *dev) } #ifdef CONFIG_PROC_FS -struct aarp_iter_state { - int bucket; - struct aarp_entry **table; -}; - /* * Get the aarp entry that is in the chain described * by the iterator. @@ -1033,25 +1028,12 @@ static int aarp_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations aarp_seq_ops = { +const struct seq_operations aarp_seq_ops = { .start = aarp_seq_start, .next = aarp_seq_next, .stop = aarp_seq_stop, .show = aarp_seq_show, }; - -static int aarp_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &aarp_seq_ops, - sizeof(struct aarp_iter_state)); -} - -const struct file_operations atalk_seq_arp_fops = { - .open = aarp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; #endif /* General module cleanup. Called from cleanup_module() in ddp.c. */ diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 7214aea14cb3..8006295f8bd7 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -210,42 +210,6 @@ static const struct seq_operations atalk_seq_socket_ops = { .show = atalk_seq_socket_show, }; -static int atalk_seq_interface_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &atalk_seq_interface_ops); -} - -static int atalk_seq_route_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &atalk_seq_route_ops); -} - -static int atalk_seq_socket_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &atalk_seq_socket_ops); -} - -static const struct file_operations atalk_seq_interface_fops = { - .open = atalk_seq_interface_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations atalk_seq_route_fops = { - .open = atalk_seq_route_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations atalk_seq_socket_fops = { - .open = atalk_seq_socket_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct proc_dir_entry *atalk_proc_dir; int __init atalk_proc_init(void) @@ -257,22 +221,23 @@ int __init atalk_proc_init(void) if (!atalk_proc_dir) goto out; - p = proc_create("interface", 0444, atalk_proc_dir, - &atalk_seq_interface_fops); + p = proc_create_seq("interface", 0444, atalk_proc_dir, + &atalk_seq_interface_ops); if (!p) goto out_interface; - p = proc_create("route", 0444, atalk_proc_dir, - &atalk_seq_route_fops); + p = proc_create_seq("route", 0444, atalk_proc_dir, + &atalk_seq_route_ops); if (!p) goto out_route; - p = proc_create("socket", 0444, atalk_proc_dir, - &atalk_seq_socket_fops); + p = proc_create_seq("socket", 0444, atalk_proc_dir, + &atalk_seq_socket_ops); if (!p) goto out_socket; - p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops); + p = proc_create_seq_private("arp", 0444, atalk_proc_dir, &aarp_seq_ops, + sizeof(struct aarp_iter_state), NULL); if (!p) goto out_arp; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fd94bea36ee8..36b3adacc0dd 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -818,18 +818,6 @@ static const struct seq_operations br2684_seq_ops = { .show = br2684_seq_show, }; -static int br2684_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &br2684_seq_ops); -} - -static const struct file_operations br2684_proc_ops = { - .open = br2684_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - extern struct proc_dir_entry *atm_proc_root; /* from proc.c */ #endif /* CONFIG_PROC_FS */ @@ -837,7 +825,7 @@ static int __init br2684_init(void) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *p; - p = proc_create("br2684", 0, atm_proc_root, &br2684_proc_ops); + p = proc_create_seq("br2684", 0, atm_proc_root, &br2684_seq_ops); if (p == NULL) return -ENOMEM; #endif diff --git a/net/atm/clip.c b/net/atm/clip.c index f07dbc632222..66caa48a27c2 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -863,20 +863,6 @@ static const struct seq_operations arp_seq_ops = { .stop = neigh_seq_stop, .show = clip_seq_show, }; - -static int arp_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &arp_seq_ops, - sizeof(struct clip_seq_state)); -} - -static const struct file_operations arp_seq_fops = { - .open = arp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, - .owner = THIS_MODULE -}; #endif static void atm_clip_exit_noproc(void); @@ -893,7 +879,8 @@ static int __init atm_clip_init(void) { struct proc_dir_entry *p; - p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops); + p = proc_create_net("arp", 0444, atm_proc_root, &arp_seq_ops, + sizeof(struct clip_seq_state)); if (!p) { pr_err("Unable to initialize /proc/net/atm/arp\n"); atm_clip_exit_noproc(); diff --git a/net/atm/lec.c b/net/atm/lec.c index 3138a869b5c0..5a95fcf6f9b6 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -990,18 +990,6 @@ static const struct seq_operations lec_seq_ops = { .stop = lec_seq_stop, .show = lec_seq_show, }; - -static int lec_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &lec_seq_ops, sizeof(struct lec_state)); -} - -static const struct file_operations lec_seq_fops = { - .open = lec_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; #endif static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -1047,7 +1035,8 @@ static int __init lane_module_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *p; - p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops); + p = proc_create_seq_private("lec", 0444, atm_proc_root, &lec_seq_ops, + sizeof(struct lec_state), NULL); if (!p) { pr_err("Unable to initialize /proc/net/atm/lec\n"); return -ENOMEM; diff --git a/net/atm/proc.c b/net/atm/proc.c index 55410c00c7e2..0b0495a41bbe 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -68,7 +68,6 @@ static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev) struct vcc_state { int bucket; struct sock *sk; - int family; }; static inline int compare_family(struct sock *sk, int family) @@ -106,23 +105,13 @@ out: return (l < 0); } -static inline void *vcc_walk(struct vcc_state *state, loff_t l) +static inline void *vcc_walk(struct seq_file *seq, loff_t l) { - return __vcc_walk(&state->sk, state->family, &state->bucket, l) ? - state : NULL; -} - -static int __vcc_seq_open(struct inode *inode, struct file *file, - int family, const struct seq_operations *ops) -{ - struct vcc_state *state; - - state = __seq_open_private(file, ops, sizeof(*state)); - if (state == NULL) - return -ENOMEM; + struct vcc_state *state = seq->private; + int family = (uintptr_t)(PDE_DATA(file_inode(seq->file))); - state->family = family; - return 0; + return __vcc_walk(&state->sk, family, &state->bucket, l) ? + state : NULL; } static void *vcc_seq_start(struct seq_file *seq, loff_t *pos) @@ -133,7 +122,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t *pos) read_lock(&vcc_sklist_lock); state->sk = SEQ_START_TOKEN; - return left ? vcc_walk(state, left) : SEQ_START_TOKEN; + return left ? vcc_walk(seq, left) : SEQ_START_TOKEN; } static void vcc_seq_stop(struct seq_file *seq, void *v) @@ -144,9 +133,7 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct vcc_state *state = seq->private; - - v = vcc_walk(state, 1); + v = vcc_walk(seq, 1); *pos += !!PTR_ERR(v); return v; } @@ -257,18 +244,6 @@ static const struct seq_operations atm_dev_seq_ops = { .show = atm_dev_seq_show, }; -static int atm_dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &atm_dev_seq_ops); -} - -static const struct file_operations devices_seq_fops = { - .open = atm_dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int pvc_seq_show(struct seq_file *seq, void *v) { static char atm_pvc_banner[] = @@ -292,18 +267,6 @@ static const struct seq_operations pvc_seq_ops = { .show = pvc_seq_show, }; -static int pvc_seq_open(struct inode *inode, struct file *file) -{ - return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops); -} - -static const struct file_operations pvc_seq_fops = { - .open = pvc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static int vcc_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { @@ -326,18 +289,6 @@ static const struct seq_operations vcc_seq_ops = { .show = vcc_seq_show, }; -static int vcc_seq_open(struct inode *inode, struct file *file) -{ - return __vcc_seq_open(inode, file, 0, &vcc_seq_ops); -} - -static const struct file_operations vcc_seq_fops = { - .open = vcc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static int svc_seq_show(struct seq_file *seq, void *v) { static const char atm_svc_banner[] = @@ -361,18 +312,6 @@ static const struct seq_operations svc_seq_ops = { .show = svc_seq_show, }; -static int svc_seq_open(struct inode *inode, struct file *file) -{ - return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops); -} - -static const struct file_operations svc_seq_fops = { - .open = svc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -440,58 +379,22 @@ void atm_proc_dev_deregister(struct atm_dev *dev) kfree(dev->proc_name); } -static struct atm_proc_entry { - char *name; - const struct file_operations *proc_fops; - struct proc_dir_entry *dirent; -} atm_proc_ents[] = { - { .name = "devices", .proc_fops = &devices_seq_fops }, - { .name = "pvc", .proc_fops = &pvc_seq_fops }, - { .name = "svc", .proc_fops = &svc_seq_fops }, - { .name = "vc", .proc_fops = &vcc_seq_fops }, - { .name = NULL, .proc_fops = NULL } -}; - -static void atm_proc_dirs_remove(void) -{ - static struct atm_proc_entry *e; - - for (e = atm_proc_ents; e->name; e++) { - if (e->dirent) - remove_proc_entry(e->name, atm_proc_root); - } - remove_proc_entry("atm", init_net.proc_net); -} - int __init atm_proc_init(void) { - static struct atm_proc_entry *e; - int ret; - atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net); if (!atm_proc_root) - goto err_out; - for (e = atm_proc_ents; e->name; e++) { - struct proc_dir_entry *dirent; - - dirent = proc_create(e->name, 0444, - atm_proc_root, e->proc_fops); - if (!dirent) - goto err_out_remove; - e->dirent = dirent; - } - ret = 0; -out: - return ret; - -err_out_remove: - atm_proc_dirs_remove(); -err_out: - ret = -ENOMEM; - goto out; + return -ENOMEM; + proc_create_seq("devices", 0444, atm_proc_root, &atm_dev_seq_ops); + proc_create_seq_private("pvc", 0444, atm_proc_root, &pvc_seq_ops, + sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMPVC); + proc_create_seq_private("svc", 0444, atm_proc_root, &svc_seq_ops, + sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMSVC); + proc_create_seq_private("vc", 0444, atm_proc_root, &vcc_seq_ops, + sizeof(struct vcc_state), NULL); + return 0; } void atm_proc_exit(void) { - atm_proc_dirs_remove(); + remove_proc_subtree("atm", init_net.proc_net); } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2b41366fcad2..c603d33d5410 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1924,19 +1924,6 @@ static const struct seq_operations ax25_info_seqops = { .stop = ax25_info_stop, .show = ax25_info_show, }; - -static int ax25_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ax25_info_seqops); -} - -static const struct file_operations ax25_info_fops = { - .open = ax25_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif static const struct net_proto_family ax25_family_ops = { @@ -1989,10 +1976,10 @@ static int __init ax25_init(void) dev_add_pack(&ax25_packet_type); register_netdevice_notifier(&ax25_dev_notifier); - proc_create("ax25_route", 0444, init_net.proc_net, - &ax25_route_fops); - proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops); - proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops); + proc_create_seq("ax25_route", 0444, init_net.proc_net, &ax25_rt_seqops); + proc_create_seq("ax25", 0444, init_net.proc_net, &ax25_info_seqops); + proc_create_seq("ax25_calls", 0444, init_net.proc_net, + &ax25_uid_seqops); out: return rc; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 525558972fd9..a0eff323af12 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -323,25 +323,12 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations ax25_rt_seqops = { +const struct seq_operations ax25_rt_seqops = { .start = ax25_rt_seq_start, .next = ax25_rt_seq_next, .stop = ax25_rt_seq_stop, .show = ax25_rt_seq_show, }; - -static int ax25_rt_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ax25_rt_seqops); -} - -const struct file_operations ax25_route_fops = { - .open = ax25_rt_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 4ebe91ba317a..99d02e390e43 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -181,25 +181,12 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations ax25_uid_seqops = { +const struct seq_operations ax25_uid_seqops = { .start = ax25_uid_seq_start, .next = ax25_uid_seq_next, .stop = ax25_uid_seq_stop, .show = ax25_uid_seq_show, }; - -static int ax25_uid_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ax25_uid_seqops); -} - -const struct file_operations ax25_uid_fops = { - .open = ax25_uid_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a11d3d89f012..a35f597e8c8b 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1536,7 +1536,7 @@ out: if (!ret && primary_if) *primary_if = hard_iface; - else + else if (hard_iface) batadv_hardif_put(hard_iface); return ret; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0225616d5771..3986551397ca 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -862,7 +862,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -900,7 +900,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -931,15 +931,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -947,7 +952,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -964,6 +969,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -974,7 +983,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } @@ -1538,6 +1547,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * handled by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1545,7 +1556,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1553,6 +1565,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1731,7 +1747,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2880,23 +2896,46 @@ unlock: } /** - * batadv_tt_local_valid() - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2910,7 +2949,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2920,25 +2960,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2948,11 +2997,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 84d92a077834..3264e1873219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -605,15 +605,10 @@ int bt_sock_wait_ready(struct sock *sk, unsigned long flags) EXPORT_SYMBOL(bt_sock_wait_ready); #ifdef CONFIG_PROC_FS -struct bt_seq_state { - struct bt_sock_list *l; -}; - static void *bt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq->private->l->lock) { - struct bt_seq_state *s = seq->private; - struct bt_sock_list *l = s->l; + struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); read_lock(&l->lock); return seq_hlist_start_head(&l->head, *pos); @@ -621,8 +616,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos) static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct bt_seq_state *s = seq->private; - struct bt_sock_list *l = s->l; + struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); return seq_hlist_next(v, &l->head, pos); } @@ -630,16 +624,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void bt_seq_stop(struct seq_file *seq, void *v) __releases(seq->private->l->lock) { - struct bt_seq_state *s = seq->private; - struct bt_sock_list *l = s->l; + struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); read_unlock(&l->lock); } static int bt_seq_show(struct seq_file *seq, void *v) { - struct bt_seq_state *s = seq->private; - struct bt_sock_list *l = s->l; + struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Parent"); @@ -681,35 +673,13 @@ static const struct seq_operations bt_seq_ops = { .show = bt_seq_show, }; -static int bt_seq_open(struct inode *inode, struct file *file) -{ - struct bt_sock_list *sk_list; - struct bt_seq_state *s; - - sk_list = PDE_DATA(inode); - s = __seq_open_private(file, &bt_seq_ops, - sizeof(struct bt_seq_state)); - if (!s) - return -ENOMEM; - - s->l = sk_list; - return 0; -} - -static const struct file_operations bt_fops = { - .open = bt_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private -}; - int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (* seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; - if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list)) + if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list)) return -ENOMEM; return 0; } diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 426a92f02db4..eb41556002e3 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -521,18 +521,6 @@ static int cmtp_proc_show(struct seq_file *m, void *v) return 0; } -static int cmtp_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cmtp_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations cmtp_proc_fops = { - .open = cmtp_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - int cmtp_attach_device(struct cmtp_session *session) { unsigned char buf[4]; @@ -571,7 +559,7 @@ int cmtp_attach_device(struct cmtp_session *session) session->ctrl.send_message = cmtp_send_message; session->ctrl.procinfo = cmtp_procinfo; - session->ctrl.proc_fops = &cmtp_proc_fops; + session->ctrl.proc_show = cmtp_proc_show; if (attach_capi_ctr(&session->ctrl) < 0) { BT_ERR("Can't attach new controller"); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 47ba98db145d..46c1fe7637ea 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) /* Make sure the match only receives stp frames */ if (!par->nft_compat && (!ether_addr_equal(e->destmac, eth_stp_addr) || - !is_broadcast_ether_addr(e->destmsk) || - !(e->bitmask & EBT_DESTMAC))) + !(e->bitmask & EBT_DESTMAC) || + !is_broadcast_ether_addr(e->destmsk))) return -EINVAL; return 0; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 28a4c3490359..6ba639f6c51d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1954,7 +1954,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, int off, pad = 0; unsigned int size_kern, match_size = mwt->match_size; - strlcpy(name, mwt->u.name, sizeof(name)); + if (strscpy(name, mwt->u.name, sizeof(name)) < 0) + return -EINVAL; if (state->buf_kern_start) dst = state->buf_kern_start + state->buf_kern_offset; diff --git a/net/can/bcm.c b/net/can/bcm.c index ac5e5e34fee3..6ad89f49b341 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -239,18 +239,6 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); return 0; } - -static int bcm_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, bcm_proc_show); -} - -static const struct file_operations bcm_proc_fops = { - .open = bcm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ /* @@ -1606,9 +1594,9 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); - bo->bcm_proc_read = proc_create_data(bo->procname, 0644, + bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644, net->can.bcmproc_dir, - &bcm_proc_fops, sk); + bcm_proc_show, sk); if (!bo->bcm_proc_read) { ret = -ENOMEM; goto fail; diff --git a/net/can/proc.c b/net/can/proc.c index fdf704e9bb8c..70fea17bb04c 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -270,18 +270,6 @@ static int can_stats_proc_show(struct seq_file *m, void *v) return 0; } -static int can_stats_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_stats_proc_show); -} - -static const struct file_operations can_stats_proc_fops = { - .open = can_stats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int can_reset_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; @@ -303,36 +291,12 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v) return 0; } -static int can_reset_stats_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_reset_stats_proc_show); -} - -static const struct file_operations can_reset_stats_proc_fops = { - .open = can_reset_stats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int can_version_proc_show(struct seq_file *m, void *v) { seq_printf(m, "%s\n", CAN_VERSION_STRING); return 0; } -static int can_version_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_version_proc_show); -} - -static const struct file_operations can_version_proc_fops = { - .open = can_version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, struct can_dev_rcv_lists *d) @@ -373,18 +337,6 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) return 0; } -static int can_rcvlist_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_rcvlist_proc_show); -} - -static const struct file_operations can_rcvlist_proc_fops = { - .open = can_rcvlist_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static inline void can_rcvlist_proc_show_array(struct seq_file *m, struct net_device *dev, struct hlist_head *rcv_array, @@ -440,19 +392,6 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) return 0; } -static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_rcvlist_sff_proc_show); -} - -static const struct file_operations can_rcvlist_sff_proc_fops = { - .open = can_rcvlist_sff_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; @@ -483,18 +422,6 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) return 0; } -static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, can_rcvlist_eff_proc_show); -} - -static const struct file_operations can_rcvlist_eff_proc_fops = { - .open = can_rcvlist_eff_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* * can_init_proc - create main CAN proc directory and procfs entries */ @@ -510,37 +437,29 @@ void can_init_proc(struct net *net) } /* own procfs entries from the AF_CAN core */ - net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644, - net->can.proc_dir, - &can_version_proc_fops); - net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644, - net->can.proc_dir, - &can_stats_proc_fops); - net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, - net->can.proc_dir, - &can_reset_stats_proc_fops); - net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, - net->can.proc_dir, - &can_rcvlist_proc_fops, - (void *)RX_ERR); - net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, - net->can.proc_dir, - &can_rcvlist_proc_fops, - (void *)RX_ALL); - net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, - net->can.proc_dir, - &can_rcvlist_proc_fops, - (void *)RX_FIL); - net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, - net->can.proc_dir, - &can_rcvlist_proc_fops, - (void *)RX_INV); - net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, - net->can.proc_dir, - &can_rcvlist_eff_proc_fops); - net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, - net->can.proc_dir, - &can_rcvlist_sff_proc_fops); + net->can.pde_version = proc_create_net_single(CAN_PROC_VERSION, 0644, + net->can.proc_dir, can_version_proc_show, NULL); + net->can.pde_stats = proc_create_net_single(CAN_PROC_STATS, 0644, + net->can.proc_dir, can_stats_proc_show, NULL); + net->can.pde_reset_stats = proc_create_net_single(CAN_PROC_RESET_STATS, + 0644, net->can.proc_dir, can_reset_stats_proc_show, + NULL); + net->can.pde_rcvlist_err = proc_create_net_single(CAN_PROC_RCVLIST_ERR, + 0644, net->can.proc_dir, can_rcvlist_proc_show, + (void *)RX_ERR); + net->can.pde_rcvlist_all = proc_create_net_single(CAN_PROC_RCVLIST_ALL, + 0644, net->can.proc_dir, can_rcvlist_proc_show, + (void *)RX_ALL); + net->can.pde_rcvlist_fil = proc_create_net_single(CAN_PROC_RCVLIST_FIL, + 0644, net->can.proc_dir, can_rcvlist_proc_show, + (void *)RX_FIL); + net->can.pde_rcvlist_inv = proc_create_net_single(CAN_PROC_RCVLIST_INV, + 0644, net->can.proc_dir, can_rcvlist_proc_show, + (void *)RX_INV); + net->can.pde_rcvlist_eff = proc_create_net_single(CAN_PROC_RCVLIST_EFF, + 0644, net->can.proc_dir, can_rcvlist_eff_proc_show, NULL); + net->can.pde_rcvlist_sff = proc_create_net_single(CAN_PROC_RCVLIST_SFF, + 0644, net->can.proc_dir, can_rcvlist_sff_proc_show, NULL); } /* diff --git a/net/core/dev.c b/net/core/dev.c index af0558b00c6c..983b277a1229 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2124,7 +2124,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev, int i, j; for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, cpu, j)) + if (!remove_xps_queue(dev_maps, tci, j)) break; } @@ -2884,11 +2884,7 @@ void netdev_rx_csum_fault(struct net_device *dev) EXPORT_SYMBOL(netdev_rx_csum_fault); #endif -/* Actually, we should eliminate this check as soon as we know, that: - * 1. IOMMU is present and allows to map all the memory. - * 2. No high memory really exists on this machine. - */ - +/* XXX: check that highmem exists at all on the given machine. */ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM @@ -2902,20 +2898,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) return 1; } } - - if (PCI_DMA_BUS_IS_PHYS) { - struct device *pdev = dev->dev.parent; - - if (!pdev) - return 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - dma_addr_t addr = page_to_phys(skb_frag_page(frag)); - - if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) - return 1; - } - } #endif return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index e77c30ca491d..201ff36b17a8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -481,11 +481,18 @@ do_pass: #define BPF_EMIT_JMP \ do { \ + const s32 off_min = S16_MIN, off_max = S16_MAX; \ + s32 off; \ + \ if (target >= len || target < 0) \ goto err; \ - insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ /* Adjust pc relative offset for 2nd or 3rd insn. */ \ - insn->off -= insn - tmp_insns; \ + off -= insn - tmp_insns; \ + /* Reject anything not fitting into insn->off. */ \ + if (off < off_min || off > off_max) \ + goto err; \ + insn->off = off; \ } while (0) case BPF_JMP | BPF_JA: diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ce519861be59..1fb43bff417d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -59,7 +59,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, struct net_device *dev); #ifdef CONFIG_PROC_FS -static const struct file_operations neigh_stat_seq_fops; +static const struct seq_operations neigh_stat_seq_ops; #endif /* @@ -1558,8 +1558,8 @@ void neigh_table_init(int index, struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops, tbl)) + if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_ops, tbl)) panic("cannot create neighbour proc dir entry"); #endif @@ -2786,7 +2786,7 @@ EXPORT_SYMBOL(neigh_seq_stop); static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) { - struct neigh_table *tbl = seq->private; + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; if (*pos == 0) @@ -2803,7 +2803,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct neigh_table *tbl = seq->private; + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { @@ -2822,7 +2822,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v) static int neigh_stat_seq_show(struct seq_file *seq, void *v) { - struct neigh_table *tbl = seq->private; + struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { @@ -2861,25 +2861,6 @@ static const struct seq_operations neigh_stat_seq_ops = { .stop = neigh_stat_seq_stop, .show = neigh_stat_seq_show, }; - -static int neigh_stat_seq_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &neigh_stat_seq_ops); - - if (!ret) { - struct seq_file *sf = file->private_data; - sf->private = PDE_DATA(inode); - } - return ret; -}; - -static const struct file_operations neigh_stat_seq_fops = { - .open = neigh_stat_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* CONFIG_PROC_FS */ static inline size_t neigh_nlmsg_size(void) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 9737302907b1..63881f72ef71 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -175,19 +175,6 @@ static const struct seq_operations dev_seq_ops = { .show = dev_seq_show, }; -static int dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_seq_fops = { - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, @@ -195,18 +182,6 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); -} - -static const struct file_operations softnet_seq_fops = { - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static void *ptype_get_idx(loff_t pos) { struct packet_type *pt = NULL; @@ -297,30 +272,18 @@ static const struct seq_operations ptype_seq_ops = { .show = ptype_seq_show, }; -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops)) + if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, + sizeof(struct seq_net_private))) goto out; - if (!proc_create("softnet_stat", 0444, net->proc_net, - &softnet_seq_fops)) + if (!proc_create_seq("softnet_stat", 0444, net->proc_net, + &softnet_seq_ops)) goto out_dev; - if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops)) + if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, + sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) @@ -377,22 +340,10 @@ static const struct seq_operations dev_mc_seq_ops = { .show = dev_mc_seq_show, }; -static int dev_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_mc_seq_fops = { - .open = dev_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init dev_mc_net_init(struct net *net) { - if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) + if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, + sizeof(struct seq_net_private))) return -ENOMEM; return 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c476f0794132..bb7e80f4ced3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1214,9 +1214,6 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, cpumask_var_t mask; unsigned long index; - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - index = get_netdev_queue_index(queue); if (dev->num_tc) { @@ -1226,6 +1223,9 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, return -EINVAL; } + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { diff --git a/net/core/sock.c b/net/core/sock.c index 6444525f610c..815770333d91 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1606,7 +1606,7 @@ static void __sk_free(struct sock *sk) if (likely(sk->sk_net_refcnt)) sock_inuse_add(sock_net(sk), -1); - if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) + if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) sock_diag_broadcast_destroy(sk); else sk_destruct(sk); @@ -3439,22 +3439,10 @@ static const struct seq_operations proto_seq_ops = { .show = proto_seq_show, }; -static int proto_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &proto_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations proto_seq_fops = { - .open = proto_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static __net_init int proto_init_net(struct net *net) { - if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops)) + if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops, + sizeof(struct seq_net_private))) return -ENOMEM; return 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 84cd4e3fd01b..0d56e36a6db7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -283,9 +283,7 @@ int dccp_disconnect(struct sock *sk, int flags) dccp_clear_xmit_timers(sk); ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; - dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 32751602767f..7d6ff983ba2c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2314,19 +2314,6 @@ static const struct seq_operations dn_socket_seq_ops = { .stop = dn_socket_seq_stop, .show = dn_socket_seq_show, }; - -static int dn_socket_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &dn_socket_seq_ops, - sizeof(struct dn_iter_state)); -} - -static const struct file_operations dn_socket_seq_fops = { - .open = dn_socket_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; #endif static const struct net_proto_family dn_family_ops = { @@ -2383,7 +2370,9 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops); + proc_create_seq_private("decnet", 0444, init_net.proc_net, + &dn_socket_seq_ops, sizeof(struct dn_iter_state), + NULL); dn_register_sysctl(); out: return rc; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c03b046478c3..bfd43e8f2c06 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1382,19 +1382,6 @@ static const struct seq_operations dn_dev_seq_ops = { .stop = dn_dev_seq_stop, .show = dn_dev_seq_show, }; - -static int dn_dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &dn_dev_seq_ops); -} - -static const struct file_operations dn_dev_seq_fops = { - .open = dn_dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* CONFIG_PROC_FS */ static int addr[2]; @@ -1424,7 +1411,7 @@ void __init dn_dev_init(void) rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); - proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops); + proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops); #ifdef CONFIG_SYSCTL { diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 13156165afa3..94b306f6d551 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -589,27 +589,13 @@ static const struct seq_operations dn_neigh_seq_ops = { .stop = neigh_seq_stop, .show = dn_neigh_seq_show, }; - -static int dn_neigh_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dn_neigh_seq_ops, - sizeof(struct neigh_seq_state)); -} - -static const struct file_operations dn_neigh_seq_fops = { - .open = dn_neigh_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif void __init dn_neigh_init(void) { neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table); - proc_create("decnet_neigh", 0444, init_net.proc_net, - &dn_neigh_seq_fops); + proc_create_net("decnet_neigh", 0444, init_net.proc_net, + &dn_neigh_seq_ops, sizeof(struct neigh_seq_state)); } void __exit dn_neigh_cleanup(void) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index eca0cc6b761f..e74765024d88 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1852,20 +1852,6 @@ static const struct seq_operations dn_rt_cache_seq_ops = { .stop = dn_rt_cache_seq_stop, .show = dn_rt_cache_seq_show, }; - -static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &dn_rt_cache_seq_ops, - sizeof(struct dn_rt_cache_iter_state)); -} - -static const struct file_operations dn_rt_cache_seq_fops = { - .open = dn_rt_cache_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - #endif /* CONFIG_PROC_FS */ void __init dn_route_init(void) @@ -1918,8 +1904,9 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_create("decnet_cache", 0444, init_net.proc_net, - &dn_rt_cache_seq_fops); + proc_create_seq_private("decnet_cache", 0444, init_net.proc_net, + &dn_rt_cache_seq_ops, + sizeof(struct dn_rt_cache_iter_state), NULL); #ifdef CONFIG_DECNET_ROUTER rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index adf50fbc4c13..47725250b4ca 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; - int err; + int err = 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); + if (dp->type != DSA_PORT_TYPE_UNUSED) + err = devlink_port_register(ds->devlink, &dp->devlink_port, + dp->index); if (err) return err; @@ -293,7 +295,8 @@ static int dsa_port_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { - devlink_port_unregister(&dp->devlink_port); + if (dp->type != DSA_PORT_TYPE_UNUSED) + devlink_port_unregister(&dp->devlink_port); switch (dp->type) { case DSA_PORT_TYPE_UNUSED: diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index bf6c2d4d4fdc..e90c89ef8c08 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1418,23 +1418,12 @@ static const struct seq_operations arp_seq_ops = { .show = arp_seq_show, }; -static int arp_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &arp_seq_ops, - sizeof(struct neigh_seq_state)); -} - -static const struct file_operations arp_seq_fops = { - .open = arp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - +/* ------------------------------------------------------------------------ */ static int __net_init arp_net_init(struct net *net) { - if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops)) + if (!proc_create_net("arp", 0444, net->proc_net, &arp_seq_ops, + sizeof(struct neigh_seq_state))) return -ENOMEM; return 0; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..e66172aaf241 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { + struct net *net = dev_net(dev); + struct flow_keys flkeys; int ret, no_addr; struct fib_result res; struct flowi4 fl4; - struct net *net = dev_net(dev); bool dev_match; fl4.flowi4_oif = 0; @@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, no_addr = idev->ifa_list == NULL; fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; + if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } trace_fib_validate_source(dev, &fl4); @@ -643,6 +649,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3dcffd3ce98c..99c23a0cb8ca 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2348,18 +2348,6 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) return 0; } -static int fib_triestat_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, fib_triestat_seq_show); -} - -static const struct file_operations fib_triestat_fops = { - .open = fib_triestat_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; @@ -2533,19 +2521,6 @@ static const struct seq_operations fib_trie_seq_ops = { .show = fib_trie_seq_show, }; -static int fib_trie_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &fib_trie_seq_ops, - sizeof(struct fib_trie_iter)); -} - -static const struct file_operations fib_trie_fops = { - .open = fib_trie_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - struct fib_route_iter { struct seq_net_private p; struct fib_table *main_tb; @@ -2726,29 +2701,18 @@ static const struct seq_operations fib_route_seq_ops = { .show = fib_route_seq_show, }; -static int fib_route_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &fib_route_seq_ops, - sizeof(struct fib_route_iter)); -} - -static const struct file_operations fib_route_fops = { - .open = fib_route_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - int __net_init fib_proc_init(struct net *net) { - if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops)) + if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, + sizeof(struct fib_trie_iter))) goto out1; - if (!proc_create("fib_triestat", 0444, net->proc_net, - &fib_triestat_fops)) + if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, + fib_triestat_seq_show, NULL)) goto out2; - if (!proc_create("route", 0444, net->proc_net, &fib_route_fops)) + if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops, + sizeof(struct fib_route_iter))) goto out3; return 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b26a81a7de42..85b617b655bc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2829,19 +2829,6 @@ static const struct seq_operations igmp_mc_seq_ops = { .show = igmp_mc_seq_show, }; -static int igmp_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &igmp_mc_seq_ops, - sizeof(struct igmp_mc_iter_state)); -} - -static const struct file_operations igmp_mc_seq_fops = { - .open = igmp_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - struct igmp_mcf_iter_state { struct seq_net_private p; struct net_device *dev; @@ -2975,29 +2962,17 @@ static const struct seq_operations igmp_mcf_seq_ops = { .show = igmp_mcf_seq_show, }; -static int igmp_mcf_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &igmp_mcf_seq_ops, - sizeof(struct igmp_mcf_iter_state)); -} - -static const struct file_operations igmp_mcf_seq_fops = { - .open = igmp_mcf_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; int err; - pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops); + pde = proc_create_net("igmp", 0444, net->proc_net, &igmp_mc_seq_ops, + sizeof(struct igmp_mc_iter_state)); if (!pde) goto out_igmp; - pde = proc_create("mcfilter", 0444, net->proc_net, - &igmp_mcf_seq_fops); + pde = proc_create_net("mcfilter", 0444, net->proc_net, + &igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state)); if (!pde) goto out_mcfilter; err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9c169bb2444d..f200b304f76c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -722,10 +722,12 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else + else if (tunnel->erspan_ver == 2) erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); + else + goto free_skb; tunnel->parms.o_flags &= ~TUNNEL_KEY; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 83c73bab2c3d..d54abc097800 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1045,7 +1045,8 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG) && + skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5ad2d8ed3a3f..57bbb060faaf 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -505,8 +505,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; - WARN_ON_ONCE(sk->sk_family == AF_INET6); - err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (!skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6b0e362cc99b..38d906baf1df 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -328,7 +328,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (tdev) { hlen = tdev->hard_header_len + tdev->needed_headroom; - mtu = tdev->mtu; + mtu = min(tdev->mtu, IP_MAX_MTU); } dev->needed_headroom = t_hlen + hlen; @@ -362,7 +362,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, nt = netdev_priv(dev); t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; + dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; ip_tunnel_add(itn, nt); return nt; @@ -930,7 +930,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); - int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; + int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1107,7 +1107,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], mtu = ip_tunnel_bind_dev(dev); if (tb[IFLA_MTU]) { - unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen; + unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, (unsigned int)(max - sizeof(struct iphdr))); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 43f620feb1c4..bbcbcc113d19 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1282,18 +1282,6 @@ static int pnp_seq_show(struct seq_file *seq, void *v) &ic_servaddr); return 0; } - -static int pnp_seq_open(struct inode *indoe, struct file *file) -{ - return single_open(file, pnp_seq_show, NULL); -} - -static const struct file_operations pnp_seq_fops = { - .open = pnp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ /* @@ -1369,7 +1357,7 @@ static int __init ip_auto_config(void) unsigned int i; #ifdef CONFIG_PROC_FS - proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops); + proc_create_single("pnp", 0444, init_net.proc_net, pnp_seq_show); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2fb4de3f7f66..37c4f885ff7b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2828,19 +2828,6 @@ static const struct seq_operations ipmr_vif_seq_ops = { .show = ipmr_vif_seq_show, }; -static int ipmr_vif_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ipmr_vif_seq_ops, - sizeof(struct mr_vif_iter)); -} - -static const struct file_operations ipmr_vif_fops = { - .open = ipmr_vif_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); @@ -2900,19 +2887,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = { .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; - -static int ipmr_mfc_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct mr_mfc_iter)); -} - -static const struct file_operations ipmr_mfc_fops = { - .open = ipmr_mfc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif #ifdef CONFIG_IP_PIMSM_V2 @@ -2977,9 +2951,11 @@ static int __net_init ipmr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops)) + if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops, + sizeof(struct mr_vif_iter))) goto proc_vif_fail; - if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops)) + if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, + sizeof(struct mr_mfc_iter))) goto proc_cache_fail; #endif return 0; diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 4fe97723b53f..30221701614c 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -43,7 +43,10 @@ mr_table_alloc(struct net *net, u32 id, write_pnet(&mrt->net, net); mrt->ops = *ops; - rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params); + if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) { + kfree(mrt); + return NULL; + } INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 44b308d93ec2..e85f35b89c49 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -34,6 +34,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); +MODULE_ALIAS("ipt_icmp"); void *ipt_alloc_initial_table(const struct xt_table *info) { diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index fd01f13c896a..12843c9ef142 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return true ^ invert; } + memset(&flow, 0, sizeof(flow)); flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); - flow.flowi4_oif = 0; flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 56a010622f70..2ed64bca54e3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1150,58 +1150,24 @@ static int ping_v4_seq_show(struct seq_file *seq, void *v) return 0; } -static int ping_seq_open(struct inode *inode, struct file *file) -{ - struct ping_seq_afinfo *afinfo = PDE_DATA(inode); - return seq_open_net(inode, file, &afinfo->seq_ops, - sizeof(struct ping_iter_state)); -} - -const struct file_operations ping_seq_fops = { - .open = ping_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; -EXPORT_SYMBOL_GPL(ping_seq_fops); - -static struct ping_seq_afinfo ping_v4_seq_afinfo = { - .name = "icmp", - .family = AF_INET, - .seq_fops = &ping_seq_fops, - .seq_ops = { - .start = ping_v4_seq_start, - .show = ping_v4_seq_show, - .next = ping_seq_next, - .stop = ping_seq_stop, - }, +static const struct seq_operations ping_v4_seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, }; -int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) +static int __net_init ping_v4_proc_init_net(struct net *net) { - struct proc_dir_entry *p; - p = proc_create_data(afinfo->name, 0444, net->proc_net, - afinfo->seq_fops, afinfo); - if (!p) + if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops, + sizeof(struct ping_iter_state))) return -ENOMEM; return 0; } -EXPORT_SYMBOL_GPL(ping_proc_register); - -void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) -{ - remove_proc_entry(afinfo->name, net->proc_net); -} -EXPORT_SYMBOL_GPL(ping_proc_unregister); - -static int __net_init ping_v4_proc_init_net(struct net *net) -{ - return ping_proc_register(net, &ping_v4_seq_afinfo); -} static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net, &ping_v4_seq_afinfo); + remove_proc_entry("icmp", net->proc_net); } static struct pernet_operations ping_v4_net_ops = { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a058de677e94..573e43c8ed87 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -77,18 +77,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) return 0; } -static int sockstat_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, sockstat_seq_show); -} - -static const struct file_operations sockstat_seq_fops = { - .open = sockstat_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - /* snmp items */ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS), @@ -460,20 +448,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v) return 0; } -static int snmp_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, snmp_seq_show); -} - -static const struct file_operations snmp_seq_fops = { - .open = snmp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - - - /* * Output /proc/net/netstat */ @@ -507,26 +481,16 @@ static int netstat_seq_show(struct seq_file *seq, void *v) return 0; } -static int netstat_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, netstat_seq_show); -} - -static const struct file_operations netstat_seq_fops = { - .open = netstat_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static __net_init int ip_proc_init_net(struct net *net) { - if (!proc_create("sockstat", 0444, net->proc_net, - &sockstat_seq_fops)) + if (!proc_create_net_single("sockstat", 0444, net->proc_net, + sockstat_seq_show, NULL)) goto out_sockstat; - if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops)) + if (!proc_create_net_single("netstat", 0444, net->proc_net, + netstat_seq_show, NULL)) goto out_netstat; - if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops)) + if (!proc_create_net_single("snmp", 0444, net->proc_net, snmp_seq_show, + NULL)) goto out_snmp; return 0; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1b4d3355624a..abb3c9490c55 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1003,11 +1003,12 @@ struct proto raw_prot = { static struct sock *raw_get_first(struct seq_file *seq) { struct sock *sk; + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - sk_for_each(sk, &state->h->ht[state->bucket]) + sk_for_each(sk, &h->ht[state->bucket]) if (sock_net(sk) == seq_file_net(seq)) goto found; } @@ -1018,6 +1019,7 @@ found: static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); do { @@ -1027,7 +1029,7 @@ try_again: } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { - sk = sk_head(&state->h->ht[state->bucket]); + sk = sk_head(&h->ht[state->bucket]); goto try_again; } return sk; @@ -1045,9 +1047,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) void *raw_seq_start(struct seq_file *seq, loff_t *pos) { - struct raw_iter_state *state = raw_seq_private(seq); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); - read_lock(&state->h->lock); + read_lock(&h->lock); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1067,9 +1069,9 @@ EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) { - struct raw_iter_state *state = raw_seq_private(seq); + struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); - read_unlock(&state->h->lock); + read_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); @@ -1110,37 +1112,10 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, - struct raw_hashinfo *h, const struct seq_operations *ops) -{ - int err; - struct raw_iter_state *i; - - err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state)); - if (err < 0) - return err; - - i = raw_seq_private((struct seq_file *)file->private_data); - i->h = h; - return 0; -} -EXPORT_SYMBOL_GPL(raw_seq_open); - -static int raw_v4_seq_open(struct inode *inode, struct file *file) -{ - return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops); -} - -static const struct file_operations raw_seq_fops = { - .open = raw_v4_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static __net_init int raw_init_net(struct net *net) { - if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops)) + if (!proc_create_net_data("raw", 0444, net->proc_net, &raw_seq_ops, + sizeof(struct raw_iter_state), &raw_v4_hashinfo)) return -ENOMEM; return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29268efad247..75fb8864be67 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -360,18 +360,6 @@ static int rt_acct_proc_show(struct seq_file *m, void *v) kfree(dst); return 0; } - -static int rt_acct_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rt_acct_proc_show, NULL); -} - -static const struct file_operations rt_acct_proc_fops = { - .open = rt_acct_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif static int __net_init ip_rt_do_proc_init(struct net *net) @@ -389,7 +377,8 @@ static int __net_init ip_rt_do_proc_init(struct net *net) goto err2; #ifdef CONFIG_IP_ROUTE_CLASSID - pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); + pde = proc_create_single("rt_acct", 0, net->proc_net, + rt_acct_proc_show); if (!pde) goto err3; #endif @@ -1961,8 +1950,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); - if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; + } else { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } err = fib_lookup(net, &fl4, res, 0); if (err != 0) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f70586b50838..2c970626b398 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1961,6 +1961,7 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); */ static void *listening_get_next(struct seq_file *seq, void *cur) { + struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; @@ -1983,7 +1984,7 @@ get_sk: sk_for_each_from(sk) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == st->family) + if (sk->sk_family == afinfo->family) return sk; } spin_unlock(&ilb->lock); @@ -2020,6 +2021,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) */ static void *established_get_first(struct seq_file *seq) { + struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; @@ -2036,7 +2038,7 @@ static void *established_get_first(struct seq_file *seq) spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != st->family || + if (sk->sk_family != afinfo->family || !net_eq(sock_net(sk), net)) { continue; } @@ -2051,6 +2053,7 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { + struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct sock *sk = cur; struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; @@ -2062,7 +2065,8 @@ static void *established_get_next(struct seq_file *seq, void *cur) sk = sk_nulls_next(sk); sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) + if (sk->sk_family == afinfo->family && + net_eq(sock_net(sk), net)) return sk; } @@ -2135,7 +2139,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) return rc; } -static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) +void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { struct tcp_iter_state *st = seq->private; void *rc; @@ -2156,8 +2160,9 @@ out: st->last_pos = *pos; return rc; } +EXPORT_SYMBOL(tcp_seq_start); -static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct tcp_iter_state *st = seq->private; void *rc = NULL; @@ -2186,8 +2191,9 @@ out: st->last_pos = *pos; return rc; } +EXPORT_SYMBOL(tcp_seq_next); -static void tcp_seq_stop(struct seq_file *seq, void *v) +void tcp_seq_stop(struct seq_file *seq, void *v) { struct tcp_iter_state *st = seq->private; @@ -2202,47 +2208,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) break; } } - -int tcp_seq_open(struct inode *inode, struct file *file) -{ - struct tcp_seq_afinfo *afinfo = PDE_DATA(inode); - struct tcp_iter_state *s; - int err; - - err = seq_open_net(inode, file, &afinfo->seq_ops, - sizeof(struct tcp_iter_state)); - if (err < 0) - return err; - - s = ((struct seq_file *)file->private_data)->private; - s->family = afinfo->family; - s->last_pos = 0; - return 0; -} -EXPORT_SYMBOL(tcp_seq_open); - -int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) -{ - int rc = 0; - struct proc_dir_entry *p; - - afinfo->seq_ops.start = tcp_seq_start; - afinfo->seq_ops.next = tcp_seq_next; - afinfo->seq_ops.stop = tcp_seq_stop; - - p = proc_create_data(afinfo->name, 0444, net->proc_net, - afinfo->seq_fops, afinfo); - if (!p) - rc = -ENOMEM; - return rc; -} -EXPORT_SYMBOL(tcp_proc_register); - -void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) -{ - remove_proc_entry(afinfo->name, net->proc_net); -} -EXPORT_SYMBOL(tcp_proc_unregister); +EXPORT_SYMBOL(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) @@ -2377,30 +2343,28 @@ out: return 0; } -static const struct file_operations tcp_afinfo_seq_fops = { - .open = tcp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net +static const struct seq_operations tcp4_seq_ops = { + .show = tcp4_seq_show, + .start = tcp_seq_start, + .next = tcp_seq_next, + .stop = tcp_seq_stop, }; static struct tcp_seq_afinfo tcp4_seq_afinfo = { - .name = "tcp", .family = AF_INET, - .seq_fops = &tcp_afinfo_seq_fops, - .seq_ops = { - .show = tcp4_seq_show, - }, }; static int __net_init tcp4_proc_init_net(struct net *net) { - return tcp_proc_register(net, &tcp4_seq_afinfo); + if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops, + sizeof(struct tcp_iter_state), &tcp4_seq_afinfo)) + return -ENOMEM; + return 0; } static void __net_exit tcp4_proc_exit_net(struct net *net) { - tcp_proc_unregister(net, &tcp4_seq_afinfo); + remove_proc_entry("tcp", net->proc_net); } static struct pernet_operations tcp4_net_ops = { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383cac0ff0ec..d07e34f8e309 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2833,8 +2833,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) return -EBUSY; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { - if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - BUG(); + if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { + WARN_ON_ONCE(1); + return -EINVAL; + } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -3342,6 +3344,7 @@ static void tcp_connect_init(struct sock *sk) sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); + tcp_write_queue_purge(sk); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b61a770884fa..051a43ff3fb8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2582,12 +2582,13 @@ EXPORT_SYMBOL(udp_prot); static struct sock *udp_get_first(struct seq_file *seq, int start) { struct sock *sk; + struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = start; state->bucket <= state->udp_table->mask; + for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; ++state->bucket) { - struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket]; if (hlist_empty(&hslot->head)) continue; @@ -2596,7 +2597,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) sk_for_each(sk, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == state->family) + if (sk->sk_family == afinfo->family) goto found; } spin_unlock_bh(&hslot->lock); @@ -2608,16 +2609,17 @@ found: static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { + struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); do { sk = sk_next(sk); - } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); + } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family)); if (!sk) { - if (state->bucket <= state->udp_table->mask) - spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); + if (state->bucket <= afinfo->udp_table->mask) + spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } return sk; @@ -2633,15 +2635,16 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *udp_seq_start(struct seq_file *seq, loff_t *pos) +void *udp_seq_start(struct seq_file *seq, loff_t *pos) { struct udp_iter_state *state = seq->private; state->bucket = MAX_UDP_PORTS; return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL(udp_seq_start); -static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -2653,56 +2656,17 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL(udp_seq_next); -static void udp_seq_stop(struct seq_file *seq, void *v) +void udp_seq_stop(struct seq_file *seq, void *v) { + struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct udp_iter_state *state = seq->private; - if (state->bucket <= state->udp_table->mask) - spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); + if (state->bucket <= afinfo->udp_table->mask) + spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); } - -int udp_seq_open(struct inode *inode, struct file *file) -{ - struct udp_seq_afinfo *afinfo = PDE_DATA(inode); - struct udp_iter_state *s; - int err; - - err = seq_open_net(inode, file, &afinfo->seq_ops, - sizeof(struct udp_iter_state)); - if (err < 0) - return err; - - s = ((struct seq_file *)file->private_data)->private; - s->family = afinfo->family; - s->udp_table = afinfo->udp_table; - return err; -} -EXPORT_SYMBOL(udp_seq_open); - -/* ------------------------------------------------------------------------ */ -int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) -{ - struct proc_dir_entry *p; - int rc = 0; - - afinfo->seq_ops.start = udp_seq_start; - afinfo->seq_ops.next = udp_seq_next; - afinfo->seq_ops.stop = udp_seq_stop; - - p = proc_create_data(afinfo->name, 0444, net->proc_net, - afinfo->seq_fops, afinfo); - if (!p) - rc = -ENOMEM; - return rc; -} -EXPORT_SYMBOL(udp_proc_register); - -void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) -{ - remove_proc_entry(afinfo->name, net->proc_net); -} -EXPORT_SYMBOL(udp_proc_unregister); +EXPORT_SYMBOL(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, @@ -2742,32 +2706,30 @@ int udp4_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct file_operations udp_afinfo_seq_fops = { - .open = udp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net +const struct seq_operations udp_seq_ops = { + .start = udp_seq_start, + .next = udp_seq_next, + .stop = udp_seq_stop, + .show = udp4_seq_show, }; +EXPORT_SYMBOL(udp_seq_ops); -/* ------------------------------------------------------------------------ */ static struct udp_seq_afinfo udp4_seq_afinfo = { - .name = "udp", .family = AF_INET, .udp_table = &udp_table, - .seq_fops = &udp_afinfo_seq_fops, - .seq_ops = { - .show = udp4_seq_show, - }, }; static int __net_init udp4_proc_init_net(struct net *net) { - return udp_proc_register(net, &udp4_seq_afinfo); + if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops, + sizeof(struct udp_iter_state), &udp4_seq_afinfo)) + return -ENOMEM; + return 0; } static void __net_exit udp4_proc_exit_net(struct net *net) { - udp_proc_unregister(net, &udp4_seq_afinfo); + remove_proc_entry("udp", net->proc_net); } static struct pernet_operations udp4_net_ops = { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f96614e9b9a5..8545457752fb 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "UDPLite: " fmt #include <linux/export.h> +#include <linux/proc_fs.h> #include "udp_impl.h" struct udp_table udplite_table __read_mostly; @@ -73,32 +74,22 @@ static struct inet_protosw udplite4_protosw = { }; #ifdef CONFIG_PROC_FS - -static const struct file_operations udplite_afinfo_seq_fops = { - .open = udp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net -}; - static struct udp_seq_afinfo udplite4_seq_afinfo = { - .name = "udplite", .family = AF_INET, .udp_table = &udplite_table, - .seq_fops = &udplite_afinfo_seq_fops, - .seq_ops = { - .show = udp4_seq_show, - }, }; static int __net_init udplite4_proc_init_net(struct net *net) { - return udp_proc_register(net, &udplite4_seq_afinfo); + if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops, + sizeof(struct udp_iter_state), &udplite4_seq_afinfo)) + return -ENOMEM; + return 0; } static void __net_exit udplite4_proc_exit_net(struct net *net) { - udp_proc_unregister(net, &udplite4_seq_afinfo); + remove_proc_entry("udplite", net->proc_net); } static struct pernet_operations udplite4_net_ops = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 78cef00c9596..1b5ea3379d9b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4254,22 +4254,10 @@ static const struct seq_operations if6_seq_ops = { .stop = if6_seq_stop, }; -static int if6_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &if6_seq_ops, - sizeof(struct if6_iter_state)); -} - -static const struct file_operations if6_fops = { - .open = if6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init if6_proc_net_init(struct net *net) { - if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops)) + if (!proc_create_net("if_inet6", 0444, net->proc_net, &if6_seq_ops, + sizeof(struct if6_iter_state))) return -ENOMEM; return 0; } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bbcabbba9bd8..ebeaf47d5c8d 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -529,22 +529,10 @@ static const struct seq_operations ac6_seq_ops = { .show = ac6_seq_show, }; -static int ac6_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ac6_seq_ops, - sizeof(struct ac6_iter_state)); -} - -static const struct file_operations ac6_seq_fops = { - .open = ac6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - int __net_init ac6_proc_init(struct net *net) { - if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops)) + if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops, + sizeof(struct ac6_iter_state))) return -ENOMEM; return 0; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deab2db6692e..01372dd74e38 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2209,15 +2209,6 @@ void fib6_gc_cleanup(void) } #ifdef CONFIG_PROC_FS - -struct ipv6_route_iter { - struct seq_net_private p; - struct fib6_walker w; - loff_t skip; - struct fib6_table *tbl; - int sernum; -}; - static int ipv6_route_seq_show(struct seq_file *seq, void *v) { struct rt6_info *rt = v; @@ -2383,17 +2374,10 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock_bh(); } -static const struct seq_operations ipv6_route_seq_ops = { +const struct seq_operations ipv6_route_seq_ops = { .start = ipv6_route_seq_start, .next = ipv6_route_seq_next, .stop = ipv6_route_seq_stop, .show = ipv6_route_seq_show }; - -int ipv6_route_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ipv6_route_seq_ops, - sizeof(struct ipv6_route_iter)); -} - #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c05c4e82a7ca..3eee7637bdfe 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -754,6 +754,10 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + + state->pid_ns = proc_pid_ns(file_inode(seq->file)); + rcu_read_lock_bh(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -808,44 +812,10 @@ static const struct seq_operations ip6fl_seq_ops = { .show = ip6fl_seq_show, }; -static int ip6fl_seq_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - struct ip6fl_iter_state *state; - int err; - - err = seq_open_net(inode, file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); - - if (!err) { - seq = file->private_data; - state = ip6fl_seq_private(seq); - rcu_read_lock(); - state->pid_ns = get_pid_ns(task_active_pid_ns(current)); - rcu_read_unlock(); - } - return err; -} - -static int ip6fl_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct ip6fl_iter_state *state = ip6fl_seq_private(seq); - put_pid_ns(state->pid_ns); - return seq_release_net(inode, file); -} - -static const struct file_operations ip6fl_seq_fops = { - .open = ip6fl_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = ip6fl_seq_release, -}; - static int __net_init ip6_flowlabel_proc_init(struct net *net) { - if (!proc_create("ip6_flowlabel", 0444, net->proc_net, - &ip6fl_seq_fops)) + if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, + &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) return -ENOMEM; return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 69727bc168cb..458de353f5d9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -71,6 +71,7 @@ struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct ip6_tnl __rcu *collect_md_tun; + struct ip6_tnl __rcu *collect_md_tun_erspan; struct net_device *fb_tunnel_dev; }; @@ -81,6 +82,7 @@ static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); +static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu); /* Tunnel hash table */ @@ -232,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (cand) return cand; - t = rcu_dereference(ign->collect_md_tun); + if (gre_proto == htons(ETH_P_ERSPAN) || + gre_proto == htons(ETH_P_ERSPAN2)) + t = rcu_dereference(ign->collect_md_tun_erspan); + else + t = rcu_dereference(ign->collect_md_tun); + if (t && t->dev->flags & IFF_UP) return t; @@ -261,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, return &ign->tunnels[prio][h]; } +static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, t); +} + +static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun_erspan, t); +} + +static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, NULL); +} + +static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign, + struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun_erspan, NULL); +} + static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, const struct ip6_tnl *t) { @@ -271,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); - if (t->parms.collect_md) - rcu_assign_pointer(ign->collect_md_tun, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -283,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; - if (t->parms.collect_md) - rcu_assign_pointer(ign->collect_md_tun, NULL); - for (tp = ip6gre_bucket(ign, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -374,11 +400,23 @@ failed_free: return NULL; } +static void ip6erspan_tunnel_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + + ip6erspan_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + dst_cache_reset(&t->dst_cache); + dev_put(dev); +} + static void ip6gre_tunnel_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); dev_put(dev); @@ -698,6 +736,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; + if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) + return -ENOMEM; + /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -908,7 +949,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } - if (skb_cow_head(skb, dev->needed_headroom)) + if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; t->parms.o_flags &= ~TUNNEL_KEY; @@ -979,11 +1020,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, erspan_build_header(skb, ntohl(t->parms.o_key), t->parms.index, truncate, false); - else + else if (t->parms.erspan_ver == 2) erspan_build_header_v2(skb, ntohl(t->parms.o_key), t->parms.dir, t->parms.hwid, truncate, false); + else + goto tx_err; + fl6.daddr = t->parms.raddr; } @@ -1019,12 +1063,11 @@ tx_err: return NETDEV_TX_OK; } -static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - int t_hlen; if (dev->type != ARPHRD_ETHER) { memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1051,12 +1094,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; +} - t->tun_hlen = gre_calc_hlen(t->parms.o_flags); - - t->hlen = t->encap_hlen + t->tun_hlen; - - t_hlen = t->hlen + sizeof(struct ipv6hdr); +static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, + int t_hlen) +{ + const struct __ip6_tnl_parm *p = &t->parms; + struct net_device *dev = t->dev; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -1088,8 +1132,26 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) } } -static int ip6gre_tnl_change(struct ip6_tnl *t, - const struct __ip6_tnl_parm *p, int set_mtu) +static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) +{ + int t_hlen; + + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + return t_hlen; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + ip6gre_tnl_link_config_common(t); + ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t)); +} + +static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1105,6 +1167,12 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, + int set_mtu) +{ + ip6gre_tnl_copy_tnl_parm(t, p); ip6gre_tnl_link_config(t, set_mtu); return 0; } @@ -1381,11 +1449,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) return ret; } - tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; - t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - - dev->hard_header_len = LL_MAX_HEADER + t_hlen; + t_hlen = ip6gre_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; @@ -1728,6 +1792,19 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel) +{ + int t_hlen; + + tunnel->tun_hlen = 8; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + erspan_hdr_len(tunnel->parms.erspan_ver); + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + return t_hlen; +} + static int ip6erspan_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1751,12 +1828,7 @@ static int ip6erspan_tap_init(struct net_device *dev) return ret; } - tunnel->tun_hlen = 8; - tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + - erspan_hdr_len(tunnel->parms.erspan_ver); - t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - - dev->hard_header_len = LL_MAX_HEADER + t_hlen; + t_hlen = ip6erspan_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; @@ -1764,14 +1836,14 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->mtu -= 8; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - ip6gre_tnl_link_config(tunnel, 1); + ip6erspan_tnl_link_config(tunnel, 1); return 0; } static const struct net_device_ops ip6erspan_netdev_ops = { .ndo_init = ip6erspan_tap_init, - .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_uninit = ip6erspan_tunnel_uninit, .ndo_start_xmit = ip6erspan_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -1835,13 +1907,11 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[], return ret; } -static int ip6gre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *nt; - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct ip_tunnel_encap ipencap; int err; @@ -1854,16 +1924,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, return err; } - ip6gre_netlink_parms(data, &nt->parms); - - if (nt->parms.collect_md) { - if (rtnl_dereference(ign->collect_md_tun)) - return -EEXIST; - } else { - if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - } - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -1874,51 +1934,94 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); dev_hold(dev); - ip6gre_tunnel_link(ign, nt); out: return err; } -static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6gre_net *ign; + int err; + + ip6gre_netlink_parms(data, &nt->parms); + ign = net_generic(net, ip6gre_net_id); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } + + err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + ip6gre_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +} + +static struct ip6_tnl * +ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], struct __ip6_tnl_parm *p_p, + struct netlink_ext_ack *extack) { struct ip6_tnl *t, *nt = netdev_priv(dev); struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); - struct __ip6_tnl_parm p; struct ip_tunnel_encap ipencap; if (dev == ign->fb_tunnel_dev) - return -EINVAL; + return ERR_PTR(-EINVAL); if (ip6gre_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) - return err; + return ERR_PTR(err); } - ip6gre_netlink_parms(data, &p); + ip6gre_netlink_parms(data, p_p); - t = ip6gre_tunnel_locate(net, &p, 0); + t = ip6gre_tunnel_locate(net, p_p, 0); if (t) { if (t->dev != dev) - return -EEXIST; + return ERR_PTR(-EEXIST); } else { t = nt; } + return t; +} + +static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct __ip6_tnl_parm p; + struct ip6_tnl *t; + + t = ip6gre_changelink_common(dev, tb, data, &p, extack); + if (IS_ERR(t)) + return PTR_ERR(t); + + ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link_md(ign, t); ip6gre_tunnel_link(ign, t); return 0; } @@ -2068,6 +2171,69 @@ static void ip6erspan_tap_setup(struct net_device *dev) netif_keep_dst(dev); } +static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6gre_net *ign; + int err; + + ip6gre_netlink_parms(data, &nt->parms); + ign = net_generic(net, ip6gre_net_id); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun_erspan)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } + + err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); + ip6erspan_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +} + +static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + ip6gre_tnl_link_config_common(t); + ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t)); +} + +static int ip6erspan_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + ip6gre_tnl_copy_tnl_parm(t, p); + ip6erspan_tnl_link_config(t, set_mtu); + return 0; +} + +static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct __ip6_tnl_parm p; + struct ip6_tnl *t; + + t = ip6gre_changelink_common(dev, tb, data, &p, extack); + if (IS_ERR(t)) + return PTR_ERR(t); + + ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6erspan_tunnel_link_md(ign, t); + ip6gre_tunnel_link(ign, t); + return 0; +} + static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .kind = "ip6gre", .maxtype = IFLA_GRE_MAX, @@ -2104,8 +2270,8 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { .priv_size = sizeof(struct ip6_tnl), .setup = ip6erspan_tap_setup, .validate = ip6erspan_tap_validate, - .newlink = ip6gre_newlink, - .changelink = ip6gre_changelink, + .newlink = ip6erspan_newlink, + .changelink = ip6erspan_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, .get_link_net = ip6_tnl_get_link_net, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2e891d2c30ef..7b6d1689087b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1503,7 +1503,8 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG) && + skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index da66aaac51ce..00e138a44cbb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1692,8 +1692,13 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu < ETH_MIN_MTU) return -EINVAL; } - if (new_mtu > 0xFFF8 - dev->hard_header_len) - return -EINVAL; + if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { + if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) + return -EINVAL; + } else { + if (new_mtu > IP_MAX_MTU - dev->hard_header_len) + return -EINVAL; + } dev->mtu = new_mtu; return 0; } @@ -1841,7 +1846,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = 0xFFF8 - dev->hard_header_len; + dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 298fd8b6ed17..4a15529d33eb 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -439,19 +439,6 @@ static const struct seq_operations ip6mr_vif_seq_ops = { .show = ip6mr_vif_seq_show, }; -static int ip6mr_vif_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ip6mr_vif_seq_ops, - sizeof(struct mr_vif_iter)); -} - -static const struct file_operations ip6mr_vif_fops = { - .open = ip6mr_vif_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); @@ -512,19 +499,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = { .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; - -static int ipmr_mfc_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct mr_mfc_iter)); -} - -static const struct file_operations ip6mr_mfc_fops = { - .open = ipmr_mfc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif #ifdef CONFIG_IPV6_PIMSM_V2 @@ -1316,9 +1290,11 @@ static int __net_init ip6mr_net_init(struct net *net) #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops)) + if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops, + sizeof(struct mr_vif_iter))) goto proc_vif_fail; - if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops)) + if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops, + sizeof(struct mr_mfc_iter))) goto proc_cache_fail; #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 793159d77d8a..975021df7c1c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2749,19 +2749,6 @@ static const struct seq_operations igmp6_mc_seq_ops = { .show = igmp6_mc_seq_show, }; -static int igmp6_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &igmp6_mc_seq_ops, - sizeof(struct igmp6_mc_iter_state)); -} - -static const struct file_operations igmp6_mc_seq_fops = { - .open = igmp6_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - struct igmp6_mcf_iter_state { struct seq_net_private p; struct net_device *dev; @@ -2903,28 +2890,17 @@ static const struct seq_operations igmp6_mcf_seq_ops = { .show = igmp6_mcf_seq_show, }; -static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &igmp6_mcf_seq_ops, - sizeof(struct igmp6_mcf_iter_state)); -} - -static const struct file_operations igmp6_mcf_seq_fops = { - .open = igmp6_mcf_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init igmp6_proc_init(struct net *net) { int err; err = -ENOMEM; - if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops)) + if (!proc_create_net("igmp6", 0444, net->proc_net, &igmp6_mc_seq_ops, + sizeof(struct igmp6_mc_iter_state))) goto out; - if (!proc_create("mcfilter6", 0444, net->proc_net, - &igmp6_mcf_seq_fops)) + if (!proc_create_net("mcfilter6", 0444, net->proc_net, + &igmp6_mcf_seq_ops, + sizeof(struct igmp6_mcf_iter_state))) goto out_proc_net_igmp6; err = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 65c9e1a58305..97f79dc943d7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -38,6 +38,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); +MODULE_ALIAS("ip6t_icmp6"); void *ip6t_alloc_initial_table(const struct xt_table *info) { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 746eeae7f581..96f56bf49a30 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -24,6 +24,7 @@ #include <net/protocol.h> #include <net/udp.h> #include <net/transp_v6.h> +#include <linux/proc_fs.h> #include <net/ping.h> /* Compatibility glue so we can support IPv6 when it's compiled as a module */ @@ -215,26 +216,24 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct ping_seq_afinfo ping_v6_seq_afinfo = { - .name = "icmp6", - .family = AF_INET6, - .seq_fops = &ping_seq_fops, - .seq_ops = { - .start = ping_v6_seq_start, - .show = ping_v6_seq_show, - .next = ping_seq_next, - .stop = ping_seq_stop, - }, +static const struct seq_operations ping_v6_seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, }; static int __net_init ping_v6_proc_init_net(struct net *net) { - return ping_proc_register(net, &ping_v6_seq_afinfo); + if (!proc_create_net("icmp6", 0444, net->proc_net, &ping_v6_seq_ops, + sizeof(struct ping_iter_state))) + return -ENOMEM; + return 0; } static void __net_init ping_v6_proc_exit_net(struct net *net) { - return ping_proc_unregister(net, &ping_v6_seq_afinfo); + remove_proc_entry("icmp6", net->proc_net); } static struct pernet_operations ping_v6_net_ops = { diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index a85f7e0b14b1..2356b4af7309 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -53,18 +53,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) return 0; } -static int sockstat6_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, sockstat6_seq_show); -} - -static const struct file_operations sockstat6_seq_fops = { - .open = sockstat6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static const struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS), @@ -242,18 +230,6 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) return 0; } -static int snmp6_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, snmp6_seq_show); -} - -static const struct file_operations snmp6_seq_fops = { - .open = snmp6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static int snmp6_dev_seq_show(struct seq_file *seq, void *v) { struct inet6_dev *idev = (struct inet6_dev *)seq->private; @@ -267,18 +243,6 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) return 0; } -static int snmp6_dev_seq_open(struct inode *inode, struct file *file) -{ - return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations snmp6_dev_seq_fops = { - .open = snmp6_dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; @@ -291,9 +255,8 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!net->mib.proc_net_devsnmp6) return -ENOENT; - p = proc_create_data(idev->dev->name, 0444, - net->mib.proc_net_devsnmp6, - &snmp6_dev_seq_fops, idev); + p = proc_create_single_data(idev->dev->name, 0444, + net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev); if (!p) return -ENOMEM; @@ -315,11 +278,12 @@ int snmp6_unregister_dev(struct inet6_dev *idev) static int __net_init ipv6_proc_init_net(struct net *net) { - if (!proc_create("sockstat6", 0444, net->proc_net, - &sockstat6_seq_fops)) + if (!proc_create_net_single("sockstat6", 0444, net->proc_net, + sockstat6_seq_show, NULL)) return -ENOMEM; - if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops)) + if (!proc_create_net_single("snmp6", 0444, net->proc_net, + snmp6_seq_show, NULL)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5eb9b08947ed..afc307c89d1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1304,21 +1304,10 @@ static const struct seq_operations raw6_seq_ops = { .show = raw6_seq_show, }; -static int raw6_seq_open(struct inode *inode, struct file *file) -{ - return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); -} - -static const struct file_operations raw6_seq_fops = { - .open = raw6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init raw6_init_net(struct net *net) { - if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops)) + if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops, + sizeof(struct raw_iter_state), &raw_v6_hashinfo)) return -ENOMEM; return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f4d61736c41a..a6598762d2c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4862,14 +4862,6 @@ static int ip6_route_dev_notify(struct notifier_block *this, */ #ifdef CONFIG_PROC_FS - -static const struct file_operations ipv6_route_proc_fops = { - .open = ipv6_route_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int rt6_stats_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; @@ -4884,18 +4876,6 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) return 0; } - -static int rt6_stats_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, rt6_stats_seq_show); -} - -static const struct file_operations rt6_stats_seq_fops = { - .open = rt6_stats_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL @@ -5100,8 +5080,10 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); - proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops); + proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter)); + proc_create_net_single("rt6_stats", 0444, net->proc_net, + rt6_stats_seq_show, NULL); #endif return 0; } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 5fe139484919..bf4763fd68c2 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -103,7 +103,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); - err = skb_cow_head(skb, tot_len); + err = skb_cow_head(skb, tot_len + skb->mac_len); if (unlikely(err)) return err; @@ -161,7 +161,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; - err = skb_cow_head(skb, hdrlen); + err = skb_cow_head(skb, hdrlen + skb->mac_len); if (unlikely(err)) return err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2afce37a7177..e9400ffa7875 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1371,7 +1371,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; - dev->max_mtu = 0xFFF8 - t_hlen; + dev->max_mtu = IP6_MAX_MTU - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; @@ -1583,7 +1583,8 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); - if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len) + if (mtu >= IPV6_MIN_MTU && + mtu <= IP6_MAX_MTU - dev->hard_header_len) dev->mtu = mtu; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6d664d83cd16..d2ce66b23430 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1909,30 +1909,28 @@ out: return 0; } -static const struct file_operations tcp6_afinfo_seq_fops = { - .open = tcp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net +static const struct seq_operations tcp6_seq_ops = { + .show = tcp6_seq_show, + .start = tcp_seq_start, + .next = tcp_seq_next, + .stop = tcp_seq_stop, }; static struct tcp_seq_afinfo tcp6_seq_afinfo = { - .name = "tcp6", .family = AF_INET6, - .seq_fops = &tcp6_afinfo_seq_fops, - .seq_ops = { - .show = tcp6_seq_show, - }, }; int __net_init tcp6_proc_init(struct net *net) { - return tcp_proc_register(net, &tcp6_seq_afinfo); + if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, + sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) + return -ENOMEM; + return 0; } void tcp6_proc_exit(struct net *net) { - tcp_proc_unregister(net, &tcp6_seq_afinfo); + remove_proc_entry("tcp6", net->proc_net); } #endif diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ea0730028e5d..00e2112da26d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1480,31 +1480,30 @@ int udp6_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct file_operations udp6_afinfo_seq_fops = { - .open = udp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net +const struct seq_operations udp6_seq_ops = { + .start = udp_seq_start, + .next = udp_seq_next, + .stop = udp_seq_stop, + .show = udp6_seq_show, }; +EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { - .name = "udp6", .family = AF_INET6, .udp_table = &udp_table, - .seq_fops = &udp6_afinfo_seq_fops, - .seq_ops = { - .show = udp6_seq_show, - }, }; int __net_init udp6_proc_init(struct net *net) { - return udp_proc_register(net, &udp6_seq_afinfo); + if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops, + sizeof(struct udp_iter_state), &udp6_seq_afinfo)) + return -ENOMEM; + return 0; } void udp6_proc_exit(struct net *net) { - udp_proc_unregister(net, &udp6_seq_afinfo); + remove_proc_entry("udp6", net->proc_net); } #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 14ae32bb1f3d..5000ad6878e6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -12,6 +12,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/export.h> +#include <linux/proc_fs.h> #include "udp_impl.h" static int udplitev6_rcv(struct sk_buff *skb) @@ -92,32 +93,23 @@ void udplitev6_exit(void) } #ifdef CONFIG_PROC_FS - -static const struct file_operations udplite6_afinfo_seq_fops = { - .open = udp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net -}; - static struct udp_seq_afinfo udplite6_seq_afinfo = { - .name = "udplite6", .family = AF_INET6, .udp_table = &udplite_table, - .seq_fops = &udplite6_afinfo_seq_fops, - .seq_ops = { - .show = udp6_seq_show, - }, }; static int __net_init udplite6_proc_init_net(struct net *net) { - return udp_proc_register(net, &udplite6_seq_afinfo); + if (!proc_create_net_data("udplite6", 0444, net->proc_net, + &udp6_seq_ops, sizeof(struct udp_iter_state), + &udplite6_seq_afinfo)) + return -ENOMEM; + return 0; } static void __net_exit udplite6_proc_exit_net(struct net *net) { - udp_proc_unregister(net, &udplite6_seq_afinfo); + remove_proc_entry("udplite6", net->proc_net); } static struct pernet_operations udplite6_net_ops = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 416fe67271a9..86dba282a147 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -126,7 +126,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; const struct ipv6hdr *hdr = ipv6_hdr(skb); - u16 offset = sizeof(*hdr); + u32 offset = sizeof(*hdr); struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u16 nhoff = IP6CB(skb)->nhoff; diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 1fac92543094..370da2f80e3c 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -15,12 +15,6 @@ #include <net/tcp.h> #ifdef CONFIG_PROC_FS -struct kcm_seq_muxinfo { - char *name; - const struct file_operations *seq_fops; - const struct seq_operations seq_ops; -}; - static struct kcm_mux *kcm_get_first(struct seq_file *seq) { struct net *net = seq_file_net(seq); @@ -86,14 +80,6 @@ struct kcm_proc_mux_state { int idx; }; -static int kcm_seq_open(struct inode *inode, struct file *file) -{ - struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode); - - return seq_open_net(inode, file, &muxinfo->seq_ops, - sizeof(struct kcm_proc_mux_state)); -} - static void kcm_format_mux_header(struct seq_file *seq) { struct net *net = seq_file_net(seq); @@ -246,44 +232,13 @@ static int kcm_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct file_operations kcm_seq_fops = { - .open = kcm_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, +static const struct seq_operations kcm_seq_ops = { + .show = kcm_seq_show, + .start = kcm_seq_start, + .next = kcm_seq_next, + .stop = kcm_seq_stop, }; -static struct kcm_seq_muxinfo kcm_seq_muxinfo = { - .name = "kcm", - .seq_fops = &kcm_seq_fops, - .seq_ops = { - .show = kcm_seq_show, - .start = kcm_seq_start, - .next = kcm_seq_next, - .stop = kcm_seq_stop, - } -}; - -static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo) -{ - struct proc_dir_entry *p; - int rc = 0; - - p = proc_create_data(muxinfo->name, 0444, net->proc_net, - muxinfo->seq_fops, muxinfo); - if (!p) - rc = -ENOMEM; - return rc; -} -EXPORT_SYMBOL(kcm_proc_register); - -static void kcm_proc_unregister(struct net *net, - struct kcm_seq_muxinfo *muxinfo) -{ - remove_proc_entry(muxinfo->name, net->proc_net); -} -EXPORT_SYMBOL(kcm_proc_unregister); - static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; @@ -390,30 +345,14 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) return 0; } -static int kcm_stats_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, kcm_stats_seq_show); -} - -static const struct file_operations kcm_stats_seq_fops = { - .open = kcm_stats_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static int kcm_proc_init_net(struct net *net) { - int err; - - if (!proc_create("kcm_stats", 0444, net->proc_net, - &kcm_stats_seq_fops)) { - err = -ENOMEM; + if (!proc_create_net_single("kcm_stats", 0444, net->proc_net, + kcm_stats_seq_show, NULL)) goto out_kcm_stats; - } - err = kcm_proc_register(net, &kcm_seq_muxinfo); - if (err) + if (!proc_create_net("kcm", 0444, net->proc_net, &kcm_seq_ops, + sizeof(struct kcm_proc_mux_state))) goto out_kcm; return 0; @@ -421,12 +360,12 @@ static int kcm_proc_init_net(struct net *net) out_kcm: remove_proc_entry("kcm_stats", net->proc_net); out_kcm_stats: - return err; + return -ENOMEM; } static void kcm_proc_exit_net(struct net *net) { - kcm_proc_unregister(net, &kcm_seq_muxinfo); + remove_proc_entry("kcm", net->proc_net); remove_proc_entry("kcm_stats", net->proc_net); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index dc76bc346829..d3601d421571 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1671,7 +1671,7 @@ static struct file *kcm_clone(struct socket *osock) __module_get(newsock->ops->owner); newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, - &kcm_proto, true); + &kcm_proto, false); if (!newsk) { sock_release(newsock); return ERR_PTR(-ENOMEM); diff --git a/net/key/af_key.c b/net/key/af_key.c index e62e52e8f141..5e1d2946ffbf 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3812,24 +3812,12 @@ static const struct seq_operations pfkey_seq_ops = { .show = pfkey_seq_show, }; -static int pfkey_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &pfkey_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations pfkey_proc_ops = { - .open = pfkey_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; - e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops); + e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops, + sizeof(struct seq_net_private)); if (e == NULL) return -ENOMEM; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1fd9e145076a..830469766c1f 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1742,24 +1742,6 @@ static const struct seq_operations pppol2tp_seq_ops = { .stop = pppol2tp_seq_stop, .show = pppol2tp_seq_show, }; - -/* Called when our /proc file is opened. We allocate data for use when - * iterating our tunnel / session contexts and store it in the private - * data of the seq_file. - */ -static int pppol2tp_proc_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &pppol2tp_seq_ops, - sizeof(struct pppol2tp_seq_data)); -} - -static const struct file_operations pppol2tp_proc_fops = { - .open = pppol2tp_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif /* CONFIG_PROC_FS */ /***************************************************************************** @@ -1771,8 +1753,8 @@ static __net_init int pppol2tp_init_net(struct net *net) struct proc_dir_entry *pde; int err = 0; - pde = proc_create("pppol2tp", 0444, net->proc_net, - &pppol2tp_proc_fops); + pde = proc_create_net("pppol2tp", 0444, net->proc_net, + &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data)); if (!pde) { err = -ENOMEM; goto out; diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 62ea0aed94b4..f3a36c16a5e7 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -214,30 +214,6 @@ static const struct seq_operations llc_seq_core_ops = { .show = llc_seq_core_show, }; -static int llc_seq_socket_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &llc_seq_socket_ops); -} - -static int llc_seq_core_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &llc_seq_core_ops); -} - -static const struct file_operations llc_seq_socket_fops = { - .open = llc_seq_socket_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations llc_seq_core_fops = { - .open = llc_seq_core_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct proc_dir_entry *llc_proc_dir; int __init llc_proc_init(void) @@ -249,11 +225,11 @@ int __init llc_proc_init(void) if (!llc_proc_dir) goto out; - p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops); + p = proc_create_seq("socket", 0444, llc_proc_dir, &llc_seq_socket_ops); if (!p) goto out_socket; - p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops); + p = proc_create_seq("core", 0444, llc_proc_dir, &llc_seq_core_ops); if (!p) goto out_core; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 0f6c9ca59062..5b5b0f95ffd1 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -401,7 +401,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, - struct ieee802_11_elems *elems, bool insert) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -447,7 +447,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; } - if (insert) + if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(sta); else rate_control_rate_update(local, sband, sta, changed); @@ -551,7 +551,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); sta = sta_info_get(sdata, addr); if (sta) { - mesh_sta_info_init(sdata, sta, elems, false); + mesh_sta_info_init(sdata, sta, elems); } else { rcu_read_unlock(); /* can't run atomic */ @@ -561,7 +561,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, return NULL; } - mesh_sta_info_init(sdata, sta, elems, true); + mesh_sta_info_init(sdata, sta, elems); if (sta_info_insert_rcu(sta)) return NULL; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 8d7e849d4825..41cede4041d3 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -215,7 +215,7 @@ err: static int ncsi_pkg_info_all_nl(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr *attrs[NCSI_ATTR_MAX]; + struct nlattr *attrs[NCSI_ATTR_MAX + 1]; struct ncsi_package *np, *package; struct ncsi_dev_priv *ndp; unsigned int package_id; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..206fb2c4c319 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) +static void __net_init +__netfilter_net_init(struct nf_hook_entries __rcu **e, int max) { int h; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1c98c907bc63..c3db074fc1f7 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -587,25 +587,13 @@ static const struct seq_operations ip_vs_app_seq_ops = { .stop = ip_vs_app_seq_stop, .show = ip_vs_app_seq_show, }; - -static int ip_vs_app_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ip_vs_app_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ip_vs_app_fops = { - .open = ip_vs_app_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); - proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops); + proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, + sizeof(struct seq_net_private)); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 370abbf6f421..61c3a389da89 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { unsigned int hash; - bool ret; + bool ret = false; + + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return refcount_dec_if_one(&cp->refcnt); hash = ip_vs_conn_hashkey_conn(cp); @@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - ret = false; /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } - } else - ret = refcount_read(&cp->refcnt) ? false : true; + } spin_unlock(&cp->lock); ct_write_unlock_bh(hash); @@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); -static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) -{ - __ip_vs_conn_put(cp); - ip_vs_conn_expire(&cp->timer); -} - /* * Put back the conn and restart its timer with its timeout */ @@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) (refcount_read(&cp->refcnt) == 1) && !timer_pending(&cp->timer)) /* expire connection immediately */ - __ip_vs_conn_put_notimer(cp); + ip_vs_conn_expire(&cp->timer); else __ip_vs_conn_put_timer(cp); } @@ -1136,19 +1131,6 @@ static const struct seq_operations ip_vs_conn_seq_ops = { .show = ip_vs_conn_seq_show, }; -static int ip_vs_conn_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ip_vs_conn_seq_ops, - sizeof(struct ip_vs_iter_state)); -} - -static const struct file_operations ip_vs_conn_fops = { - .open = ip_vs_conn_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static const char *ip_vs_origin_name(unsigned int flags) { if (flags & IP_VS_CONN_F_SYNC) @@ -1212,20 +1194,6 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { .stop = ip_vs_conn_seq_stop, .show = ip_vs_conn_sync_seq_show, }; - -static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, - sizeof(struct ip_vs_iter_state)); -} - -static const struct file_operations ip_vs_conn_sync_fops = { - .open = ip_vs_conn_sync_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif @@ -1385,9 +1353,11 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { atomic_set(&ipvs->conn_count, 0); - proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops); - proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net, - &ip_vs_conn_sync_fops); + proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, + &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); + proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state)); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5f6f73cf2174..0679dd101e72 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.inpkts++; @@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.inpkts++; s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.outpkts++; @@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.outpkts++; s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; + local_bh_disable(); + s = this_cpu_ptr(cp->dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.conns++; @@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) u64_stats_update_begin(&s->syncp); s->cnt.conns++; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f36098887ad0..141b1509c948 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2109,19 +2109,6 @@ static const struct seq_operations ip_vs_info_seq_ops = { .show = ip_vs_info_seq_show, }; -static int ip_vs_info_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ip_vs_info_seq_ops, - sizeof(struct ip_vs_iter)); -} - -static const struct file_operations ip_vs_info_fops = { - .open = ip_vs_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); @@ -2154,18 +2141,6 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) return 0; } -static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ip_vs_stats_show); -} - -static const struct file_operations ip_vs_stats_fops = { - .open = ip_vs_stats_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); @@ -2221,18 +2196,6 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) return 0; } - -static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ip_vs_stats_percpu_show); -} - -static const struct file_operations ip_vs_stats_percpu_fops = { - .open = ip_vs_stats_percpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; #endif /* @@ -2381,8 +2344,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ipvs_sync_daemon_cfg cfg; memset(&cfg, 0, sizeof(cfg)); - strlcpy(cfg.mcast_ifn, dm->mcast_ifn, - sizeof(cfg.mcast_ifn)); + ret = -EINVAL; + if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, + sizeof(cfg.mcast_ifn)) <= 0) + goto out_dec; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { @@ -2420,12 +2385,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } } + if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && + strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == + IP_VS_SCHEDNAME_MAXLEN) { + ret = -EINVAL; + goto out_unlock; + } + /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && usvc.protocol != IPPROTO_SCTP) { - pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", + pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", usvc.protocol, &usvc.addr.ip, - ntohs(usvc.port), usvc.sched_name); + ntohs(usvc.port)); ret = -EFAULT; goto out_unlock; } @@ -2847,7 +2819,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, - .len = IP_VS_IFNAME_MAXLEN }, + .len = IP_VS_IFNAME_MAXLEN - 1 }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, @@ -2865,7 +2837,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, - .len = IP_VS_SCHEDNAME_MAXLEN }, + .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, .len = IP_VS_PENAME_MAXLEN }, [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, @@ -4030,10 +4002,12 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops); - proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops); - proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net, - &ip_vs_stats_percpu_fops); + proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, + sizeof(struct ip_vs_iter)); + proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, + ip_vs_stats_show, NULL); + proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, + ip_vs_stats_percpu_show, NULL); if (ip_vs_control_net_init_sysctl(ipvs)) goto err; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4b2b3d53acfc..853b23206bb7 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -644,19 +644,6 @@ static const struct seq_operations exp_seq_ops = { .stop = exp_seq_stop, .show = exp_seq_show }; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); -} - -static const struct file_operations exp_file_ops = { - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif /* CONFIG_NF_CONNTRACK_PROCFS */ static int exp_proc_init(struct net *net) @@ -666,8 +653,8 @@ static int exp_proc_init(struct net *net) kuid_t root_uid; kgid_t root_gid; - proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, - &exp_file_ops); + proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net, + &exp_seq_ops, sizeof(struct ct_expect_iter_state)); if (!proc) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e97cdc1cf98c..8e67910185a0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct, return NF_ACCEPT; /* Don't change state */ } break; + case TCP_CONNTRACK_SYN_SENT2: + /* tcp_conntracks table is not smart enough to handle + * simultaneous open. + */ + ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; + break; + case TCP_CONNTRACK_SYN_RECV: + if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && + ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) + new_state = TCP_CONNTRACK_ESTABLISHED; + break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 037fec54c850..b642c0b2495c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -375,19 +375,6 @@ static const struct seq_operations ct_seq_ops = { .show = ct_seq_show }; -static int ct_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_seq_ops, - sizeof(struct ct_iter_state)); -} - -static const struct file_operations ct_file_ops = { - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); @@ -467,26 +454,14 @@ static const struct seq_operations ct_cpu_seq_ops = { .show = ct_cpu_seq_show, }; -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ct_cpu_seq_fops = { - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; kuid_t root_uid; kgid_t root_gid; - pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); + pde = proc_create_net("nf_conntrack", 0440, net->proc_net, &ct_seq_ops, + sizeof(struct ct_iter_state)); if (!pde) goto out_nf_conntrack; @@ -495,8 +470,8 @@ static int nf_conntrack_standalone_init_proc(struct net *net) if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(pde, root_uid, root_gid); - pde = proc_create("nf_conntrack", 0444, net->proc_net_stat, - &ct_cpu_seq_fops); + pde = proc_create_net("nf_conntrack", 0444, net->proc_net_stat, + &ct_cpu_seq_ops, sizeof(struct seq_net_private)); if (!pde) goto out_stat_nf_conntrack; return 0; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 6d0357817cda..426457047578 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -394,21 +394,6 @@ static const struct seq_operations nflog_seq_ops = { .stop = seq_stop, .show = seq_show, }; - -static int nflog_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nflog_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations nflog_file_ops = { - .open = nflog_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - #endif /* PROC_FS */ #ifdef CONFIG_SYSCTL @@ -549,8 +534,8 @@ static int __net_init nf_log_net_init(struct net *net) int ret = -ENOMEM; #ifdef CONFIG_PROC_FS - if (!proc_create("nf_log", 0444, - net->nf.proc_netfilter, &nflog_file_ops)) + if (!proc_create_net("nf_log", 0444, net->nf.proc_netfilter, + &nflog_seq_ops, sizeof(struct seq_net_private))) return ret; #endif ret = netfilter_log_sysctl_init(net); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6039b350abbe..8ff4d22f10b2 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -310,23 +310,10 @@ static const struct seq_operations synproxy_cpu_seq_ops = { .show = synproxy_cpu_seq_show, }; -static int synproxy_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &synproxy_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations synproxy_cpu_seq_fops = { - .open = synproxy_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init synproxy_proc_init(struct net *net) { - if (!proc_create("synproxy", 0444, net->proc_net_stat, - &synproxy_cpu_seq_fops)) + if (!proc_create_net("synproxy", 0444, net->proc_net_stat, + &synproxy_cpu_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 04d4e3772584..501e48a7965b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } +static void nft_rule_expr_activate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->activate) + expr->ops->activate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + +static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->deactivate) + expr->ops->deactivate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { @@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } + nft_rule_expr_deactivate(ctx, rule); return 0; } @@ -1269,8 +1298,10 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); - } else + } else { rcu_assign_pointer(chain->stats, newstats); + static_branch_inc(&nft_counters_enabled); + } } static void nf_tables_chain_destroy(struct nft_ctx *ctx) @@ -2238,6 +2269,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } +static void nf_tables_rule_release(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + nft_rule_expr_deactivate(ctx, rule); + nf_tables_rule_destroy(ctx, rule); +} + #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -2402,7 +2440,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) @@ -4044,8 +4082,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ - nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) - return -EBUSY; + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) { + err = -EBUSY; + goto err5; + } if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), @@ -4130,7 +4170,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, * NFT_GOTO verdicts. This function must be called on active data objects * from the second phase of the commit protocol. */ -static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -4668,7 +4708,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && + if (filter && filter->table && strcmp(filter->table, table->name)) goto cont; if (filter && @@ -5342,7 +5382,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && + if (filter && filter->table && strcmp(filter->table, table->name)) goto cont; @@ -5761,7 +5801,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -static void nf_tables_commit_release(struct nft_trans *trans) +static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_DELTABLE: @@ -5790,6 +5830,21 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } +static void nf_tables_commit_release(struct net *net) +{ + struct nft_trans *trans, *next; + + if (list_empty(&net->nft.commit_list)) + return; + + synchronize_rcu(); + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_del(&trans->list); + nft_commit_release(trans); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -5920,13 +5975,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } - synchronize_rcu(); - - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - list_del(&trans->list); - nf_tables_commit_release(trans); - } - + nf_tables_commit_release(net); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); return 0; @@ -6006,10 +6055,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; nft_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -6585,7 +6636,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; - nf_tables_rule_destroy(ctx, rule); + nf_tables_rule_release(ctx, rule); } list_del(&ctx->chain->list); ctx->table->use--; @@ -6623,7 +6674,7 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); chain->use--; - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index dfd0bf3810d2..40e744572283 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -119,14 +119,21 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { + struct nft_base_chain *base_chain; struct nft_stats *stats; + base_chain = nft_base_chain(chain); + if (!base_chain->stats) + return; + local_bh_disable(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); + stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); + if (stats) { + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + } local_bh_enable(); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b9505bcd3827..a0e5adf0b3b6 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, nfacct->flags = flags; } - strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + nla_strlcpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 4a4b293fb2e5..cb5b5f207777 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; - strncpy(expect_policy->name, - nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(expect_policy->name, + tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) @@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err1; - strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(helper->name, + tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > FIELD_SIZEOF(struct nf_conn_help, data)) { ret = -ENOMEM; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7b46aa4c478d..c14822b9729f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1046,20 +1046,6 @@ static const struct seq_operations nful_seq_ops = { .stop = seq_stop, .show = seq_show, }; - -static int nful_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nful_seq_ops, - sizeof(struct iter_state)); -} - -static const struct file_operations nful_file_ops = { - .open = nful_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif /* PROC_FS */ static int __net_init nfnl_log_net_init(struct net *net) @@ -1077,8 +1063,8 @@ static int __net_init nfnl_log_net_init(struct net *net) spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS - proc = proc_create("nfnetlink_log", 0440, - net->nf.proc_netfilter, &nful_file_ops); + proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter, + &nful_seq_ops, sizeof(struct iter_state)); if (!proc) return -ENOMEM; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 74a04638ef03..494a9ab35cb6 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1469,20 +1469,6 @@ static const struct seq_operations nfqnl_seq_ops = { .stop = seq_stop, .show = seq_show, }; - -static int nfqnl_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nfqnl_seq_ops, - sizeof(struct iter_state)); -} - -static const struct file_operations nfqnl_file_ops = { - .open = nfqnl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif /* PROC_FS */ static int __net_init nfnl_queue_net_init(struct net *net) @@ -1496,8 +1482,8 @@ static int __net_init nfnl_queue_net_init(struct net *net) spin_lock_init(&q->instances_lock); #ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_queue", 0440, - net->nf.proc_netfilter, &nfqnl_file_ops)) + if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter, + &nfqnl_seq_ops, sizeof(struct iter_state))) return -ENOMEM; #endif nf_register_queue_handler(net, &nfqh); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8e23726b9081..1d99a1efdafc 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,14 +27,31 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + + /* Unlike other expressions, ops doesn't have static storage duration. + * nft core assumes they do. We use kfree_rcu so that nft core can + * can check expr->ops->size even after nft_compat->destroy() frees + * the nft_xt struct that holds the ops structure. + */ + struct rcu_head rcu_head; +}; + +/* Used for matches where *info is larger than X byte */ +#define NFT_MATCH_LARGE_THRESH 192 + +struct nft_xt_match_priv { + void *info; }; -static void nft_xt_put(struct nft_xt *xt) +static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { list_del(&xt->head); - kfree(xt); + kfree_rcu(xt, rcu_head); + return true; } + + return false; } static int nft_compat_chain_validate_dependency(const char *tablename, @@ -226,6 +243,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -236,25 +254,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; /* The standard target cannot be used */ - if (target->target == NULL) { - ret = -EINVAL; - goto err; - } + if (!target->target) + return -EINVAL; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(target->me); - return ret; } static void @@ -271,8 +286,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(target->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(target->me); } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -316,11 +331,11 @@ static int nft_target_validate(const struct nft_ctx *ctx, return 0; } -static void nft_match_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void __nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct sk_buff *skb = pkt->skb; bool ret; @@ -344,6 +359,22 @@ static void nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_large_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_eval(expr, regs, pkt, priv->info); +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr)); +} + static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, [NFTA_MATCH_REV] = { .type = NLA_U32 }, @@ -404,13 +435,14 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out) } static int -nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[], + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -421,26 +453,50 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); ret = xt_check_match(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(match->me); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); +} + +static int +nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + struct xt_match *m = expr->ops->data; + int ret; + + priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL); + if (!priv->info) + return -ENOMEM; + + ret = __nft_match_init(ctx, expr, tb, priv->info); + if (ret) + kfree(priv->info); return ret; } static void -nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, + void *info) { struct xt_match *match = expr->ops->data; - void *info = nft_expr_priv(expr); struct xt_mtdtor_param par; par.net = ctx->net; @@ -450,13 +506,28 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(match->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(match->me); } -static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void +nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); +} + +static void +nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_destroy(ctx, expr, priv->info); + kfree(priv->info); +} + +static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || @@ -470,6 +541,18 @@ nla_put_failure: return -1; } +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + return __nft_match_dump(skb, expr, nft_expr_priv(expr)); +} + +static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(e); + + return __nft_match_dump(skb, e, priv->info); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -637,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, { struct nft_xt *nft_match; struct xt_match *match; + unsigned int matchsize; char *mt_name; u32 rev, family; int err; @@ -654,13 +738,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) { - if (!try_module_get(match->me)) - return ERR_PTR(-ENOENT); - - nft_match->refcnt++; + if (nft_match_cmp(match, mt_name, rev, family)) return &nft_match->ops; - } } match = xt_request_find_match(family, mt_name, rev); @@ -679,9 +758,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 1; + nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -689,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; + matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); + if (matchsize > NFT_MATCH_LARGE_THRESH) { + matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv)); + + nft_match->ops.eval = nft_match_large_eval; + nft_match->ops.init = nft_match_large_init; + nft_match->ops.destroy = nft_match_large_destroy; + nft_match->ops.dump = nft_match_large_dump; + } + + nft_match->ops.size = matchsize; + list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; @@ -739,13 +829,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (nft_target_cmp(target, tg_name, rev, family)) { - if (!try_module_get(target->me)) - return ERR_PTR(-ENOENT); - - nft_target->refcnt++; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; - } } target = xt_request_find_target(family, tg_name, rev); @@ -764,7 +849,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 1; + nft_target->refcnt = 0; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -823,6 +908,32 @@ err_match: static void __exit nft_compat_module_exit(void) { + struct nft_xt *xt, *next; + + /* list should be empty here, it can be non-empty only in case there + * was an error that caused nft_xt expr to not be initialized fully + * and noone else requested the same expression later. + * + * In this case, the lists contain 0-refcount entries that still + * hold module reference. + */ + list_for_each_entry_safe(xt, next, &nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index ea737fd789e8..5c0de704bad5 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -880,22 +880,26 @@ static int nft_ct_helper_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { const struct nft_ct_helper_obj *priv = nft_obj_data(obj); - const struct nf_conntrack_helper *helper = priv->helper4; + const struct nf_conntrack_helper *helper; u16 family; + if (priv->helper4 && priv->helper6) { + family = NFPROTO_INET; + helper = priv->helper4; + } else if (priv->helper6) { + family = NFPROTO_IPV6; + helper = priv->helper6; + } else { + family = NFPROTO_IPV4; + helper = priv->helper4; + } + if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name)) return -1; if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto)) return -1; - if (priv->helper4 && priv->helper6) - family = NFPROTO_INET; - else if (priv->helper6) - family = NFPROTO_IPV6; - else - family = NFPROTO_IPV4; - if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family))) return -1; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 4717d7796927..aa87ff8beae8 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -69,8 +69,16 @@ err1: return err; } -static void nft_immediate_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_immediate_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static void nft_immediate_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); @@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), .eval = nft_immediate_eval, .init = nft_immediate_init, - .destroy = nft_immediate_destroy, + .activate = nft_immediate_activate, + .deactivate = nft_immediate_deactivate, .dump = nft_immediate_dump, .validate = nft_immediate_validate, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index a9fc298ef4c3..72f13a1144dd 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -51,10 +51,13 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) return !limit->invert; } +/* Use same default as in iptables. */ +#define NFT_LIMIT_PKT_BURST_DEFAULT 5 + static int nft_limit_init(struct nft_limit *limit, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], bool pkts) { - u64 unit; + u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) @@ -68,18 +71,25 @@ static int nft_limit_init(struct nft_limit *limit, if (tb[NFTA_LIMIT_BURST]) limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); - else - limit->burst = 0; + + if (pkts && limit->burst == 0) + limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT; if (limit->rate + limit->burst < limit->rate) return -EOVERFLOW; - /* The token bucket size limits the number of tokens can be - * accumulated. tokens_max specifies the bucket size. - * tokens_max = unit * (rate + burst) / rate. - */ - limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), - limit->rate); + if (pkts) { + tokens = div_u64(limit->nsecs, limit->rate) * limit->burst; + } else { + /* The token bucket size limits the number of tokens can be + * accumulated. tokens_max specifies the bucket size. + * tokens_max = unit * (rate + burst) / rate. + */ + tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); + } + + limit->tokens = tokens; limit->tokens_max = limit->tokens; if (tb[NFTA_LIMIT_FLAGS]) { @@ -144,7 +154,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, struct nft_limit_pkts *priv = nft_expr_priv(expr); int err; - err = nft_limit_init(&priv->limit, tb); + err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; @@ -185,7 +195,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, { struct nft_limit *priv = nft_expr_priv(expr); - return nft_limit_init(priv, tb); + return nft_limit_init(priv, tb, false); } static int nft_limit_bytes_dump(struct sk_buff *skb, @@ -246,7 +256,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, struct nft_limit_pkts *priv = nft_obj_data(obj); int err; - err = nft_limit_init(&priv->limit, tb); + err = nft_limit_init(&priv->limit, tb, true); if (err < 0) return err; @@ -289,7 +299,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, { struct nft_limit *priv = nft_obj_data(obj); - return nft_limit_init(priv, tb); + return nft_limit_init(priv, tb, false); } static int nft_limit_obj_bytes_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8fb91940e2e7..204af9899482 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -234,7 +234,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 *sreg = ®s->data[meta->sreg]; u32 value = *sreg; - u8 pkt_type; + u8 value8; switch (meta->key) { case NFT_META_MARK: @@ -244,15 +244,17 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - pkt_type = nft_reg_load8(sreg); + value8 = nft_reg_load8(sreg); - if (skb->pkt_type != pkt_type && - skb_pkt_type_ok(pkt_type) && + if (skb->pkt_type != value8 && + skb_pkt_type_ok(value8) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = pkt_type; + skb->pkt_type = value8; break; case NFT_META_NFTRACE: - skb->nf_trace = !!value; + value8 = nft_reg_load8(sreg); + + skb->nf_trace = !!value8; break; default: WARN_ON(1); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 71325fef647d..55cb4d197184 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) struct xt_match *m; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { @@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) struct xt_target *t; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { @@ -1489,15 +1495,10 @@ void *xt_unregister_table(struct xt_table *table) EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS -struct xt_names_priv { - struct seq_net_private p; - u_int8_t af; -}; static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { - struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - u_int8_t af = priv->af; + u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); mutex_lock(&xt[af].mutex); return seq_list_start(&net->xt.tables[af], *pos); @@ -1505,17 +1506,15 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - u_int8_t af = priv->af; + u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); return seq_list_next(v, &net->xt.tables[af], pos); } static void xt_table_seq_stop(struct seq_file *seq, void *v) { - struct xt_names_priv *priv = seq->private; - u_int8_t af = priv->af; + u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); mutex_unlock(&xt[af].mutex); } @@ -1536,34 +1535,13 @@ static const struct seq_operations xt_table_seq_ops = { .show = xt_table_seq_show, }; -static int xt_table_open(struct inode *inode, struct file *file) -{ - int ret; - struct xt_names_priv *priv; - - ret = seq_open_net(inode, file, &xt_table_seq_ops, - sizeof(struct xt_names_priv)); - if (!ret) { - priv = ((struct seq_file *)file->private_data)->private; - priv->af = (unsigned long)PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations xt_table_ops = { - .open = xt_table_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - /* * Traverse state for ip{,6}_{tables,matches} for helping crossing * the multi-AF mutexes. */ struct nf_mttg_trav { struct list_head *head, *curr; - uint8_t class, nfproto; + uint8_t class; }; enum { @@ -1580,6 +1558,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, [MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC, [MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE, }; + uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; switch (trav->class) { @@ -1594,9 +1573,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, if (trav->curr != trav->head) break; mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); - mutex_lock(&xt[trav->nfproto].mutex); + mutex_lock(&xt[nfproto].mutex); trav->head = trav->curr = is_target ? - &xt[trav->nfproto].target : &xt[trav->nfproto].match; + &xt[nfproto].target : &xt[nfproto].match; trav->class = next_class[trav->class]; break; case MTTG_TRAV_NFP_SPEC: @@ -1628,6 +1607,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos, static void xt_mttg_seq_stop(struct seq_file *seq, void *v) { + uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; switch (trav->class) { @@ -1635,7 +1615,7 @@ static void xt_mttg_seq_stop(struct seq_file *seq, void *v) mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); break; case MTTG_TRAV_NFP_SPEC: - mutex_unlock(&xt[trav->nfproto].mutex); + mutex_unlock(&xt[nfproto].mutex); break; } } @@ -1674,24 +1654,6 @@ static const struct seq_operations xt_match_seq_ops = { .show = xt_match_seq_show, }; -static int xt_match_open(struct inode *inode, struct file *file) -{ - struct nf_mttg_trav *trav; - trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav)); - if (!trav) - return -ENOMEM; - - trav->nfproto = (unsigned long)PDE_DATA(inode); - return 0; -} - -static const struct file_operations xt_match_ops = { - .open = xt_match_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos) { return xt_mttg_seq_start(seq, pos, true); @@ -1726,24 +1688,6 @@ static const struct seq_operations xt_target_seq_ops = { .show = xt_target_seq_show, }; -static int xt_target_open(struct inode *inode, struct file *file) -{ - struct nf_mttg_trav *trav; - trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav)); - if (!trav) - return -ENOMEM; - - trav->nfproto = (unsigned long)PDE_DATA(inode); - return 0; -} - -static const struct file_operations xt_target_ops = { - .open = xt_target_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - #define FORMAT_TABLES "_tables_names" #define FORMAT_MATCHES "_tables_matches" #define FORMAT_TARGETS "_tables_targets" @@ -1807,8 +1751,9 @@ int xt_proto_init(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, - (void *)(unsigned long)af); + proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops, + sizeof(struct seq_net_private), + (void *)(unsigned long)af); if (!proc) goto out; if (uid_valid(root_uid) && gid_valid(root_gid)) @@ -1816,8 +1761,9 @@ int xt_proto_init(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops, - (void *)(unsigned long)af); + proc = proc_create_seq_private(buf, 0440, net->proc_net, + &xt_match_seq_ops, sizeof(struct nf_mttg_trav), + (void *)(unsigned long)af); if (!proc) goto out_remove_tables; if (uid_valid(root_uid) && gid_valid(root_gid)) @@ -1825,8 +1771,9 @@ int xt_proto_init(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops, - (void *)(unsigned long)af); + proc = proc_create_seq_private(buf, 0440, net->proc_net, + &xt_target_seq_ops, sizeof(struct nf_mttg_trav), + (void *)(unsigned long)af); if (!proc) goto out_remove_matches; if (uid_valid(root_uid) && gid_valid(root_gid)) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 0cd73567e7ff..9b16402f29af 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -57,9 +57,9 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) } /* need to declare this at the top */ -static const struct file_operations dl_file_ops_v2; -static const struct file_operations dl_file_ops_v1; -static const struct file_operations dl_file_ops; +static const struct seq_operations dl_seq_ops_v2; +static const struct seq_operations dl_seq_ops_v1; +static const struct seq_operations dl_seq_ops; /* hash table crap */ struct dsthash_dst { @@ -272,7 +272,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - const struct file_operations *fops; + const struct seq_operations *ops; unsigned int size, i; int ret; @@ -321,19 +321,19 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, switch (revision) { case 1: - fops = &dl_file_ops_v1; + ops = &dl_seq_ops_v1; break; case 2: - fops = &dl_file_ops_v2; + ops = &dl_seq_ops_v2; break; default: - fops = &dl_file_ops; + ops = &dl_seq_ops; } - hinfo->pde = proc_create_data(name, 0, + hinfo->pde = proc_create_seq_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - fops, hinfo); + ops, hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -1057,7 +1057,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { static void *dl_seq_start(struct seq_file *s, loff_t *pos) __acquires(htable->lock) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket; spin_lock_bh(&htable->lock); @@ -1074,7 +1074,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket = v; *pos = ++(*bucket); @@ -1088,7 +1088,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket = v; if (!IS_ERR(bucket)) @@ -1130,7 +1130,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1145,7 +1145,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1160,7 +1160,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1174,7 +1174,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show_v2(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; @@ -1188,7 +1188,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v) static int dl_seq_show_v1(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket = v; struct dsthash_ent *ent; @@ -1202,7 +1202,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) static int dl_seq_show(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = s->private; + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); unsigned int *bucket = v; struct dsthash_ent *ent; @@ -1235,62 +1235,6 @@ static const struct seq_operations dl_seq_ops = { .show = dl_seq_show }; -static int dl_proc_open_v2(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &dl_seq_ops_v2); - - if (!ret) { - struct seq_file *sf = file->private_data; - - sf->private = PDE_DATA(inode); - } - return ret; -} - -static int dl_proc_open_v1(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &dl_seq_ops_v1); - - if (!ret) { - struct seq_file *sf = file->private_data; - sf->private = PDE_DATA(inode); - } - return ret; -} - -static int dl_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &dl_seq_ops); - - if (!ret) { - struct seq_file *sf = file->private_data; - - sf->private = PDE_DATA(inode); - } - return ret; -} - -static const struct file_operations dl_file_ops_v2 = { - .open = dl_proc_open_v2, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static const struct file_operations dl_file_ops_v1 = { - .open = dl_proc_open_v1, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static const struct file_operations dl_file_ops = { - .open = dl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static int __net_init hashlimit_proc_net_init(struct net *net) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2e2dd88fc79f..393573a99a5a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2635,21 +2635,6 @@ static const struct seq_operations netlink_seq_ops = { .stop = netlink_seq_stop, .show = netlink_seq_show, }; - - -static int netlink_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &netlink_seq_ops, - sizeof(struct nl_seq_iter)); -} - -static const struct file_operations netlink_seq_fops = { - .open = netlink_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif int netlink_register_notifier(struct notifier_block *nb) @@ -2694,7 +2679,8 @@ static const struct net_proto_family netlink_family_ops = { static int __net_init netlink_net_init(struct net *net) { #ifdef CONFIG_PROC_FS - if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops)) + if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops, + sizeof(struct nl_seq_iter))) return -ENOMEM; #endif return 0; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4221d98a314b..c2888c78d4c1 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1338,18 +1338,6 @@ static const struct seq_operations nr_info_seqops = { .stop = nr_info_stop, .show = nr_info_show, }; - -static int nr_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &nr_info_seqops); -} - -static const struct file_operations nr_info_fops = { - .open = nr_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif /* CONFIG_PROC_FS */ static const struct net_proto_family nr_family_ops = { @@ -1450,9 +1438,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_create("nr", 0444, init_net.proc_net, &nr_info_fops); - proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops); - proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops); + proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); + proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); + proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); out: return rc; fail: diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b5a7dcb30991..6485f593e2f0 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -888,25 +888,13 @@ static int nr_node_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations nr_node_seqops = { +const struct seq_operations nr_node_seqops = { .start = nr_node_start, .next = nr_node_next, .stop = nr_node_stop, .show = nr_node_show, }; -static int nr_node_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &nr_node_seqops); -} - -const struct file_operations nr_nodes_fops = { - .open = nr_node_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static void *nr_neigh_start(struct seq_file *seq, loff_t *pos) { spin_lock_bh(&nr_neigh_list_lock); @@ -954,25 +942,12 @@ static int nr_neigh_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations nr_neigh_seqops = { +const struct seq_operations nr_neigh_seqops = { .start = nr_neigh_start, .next = nr_neigh_next, .stop = nr_neigh_stop, .show = nr_neigh_show, }; - -static int nr_neigh_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &nr_neigh_seqops); -} - -const struct file_operations nr_neigh_fops = { - .open = nr_neigh_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01f3515cada0..f9cdd27a7f6f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (skb == NULL) goto out_unlock; - skb_set_network_header(skb, reserve); + skb_reset_network_header(skb); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; + } else if (reserve) { + skb_reserve(skb, -reserve); } /* Returns -EFAULT on error */ @@ -4554,20 +4556,6 @@ static const struct seq_operations packet_seq_ops = { .stop = packet_seq_stop, .show = packet_seq_show, }; - -static int packet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &packet_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations packet_seq_fops = { - .open = packet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif static int __net_init packet_net_init(struct net *net) @@ -4575,7 +4563,8 @@ static int __net_init packet_net_init(struct net *net) mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); - if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops)) + if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, + sizeof(struct seq_net_private))) return -ENOMEM; return 0; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 77787512fc32..6cb4f602ab71 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -320,7 +320,8 @@ static int __net_init phonet_init_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); - if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops)) + if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops, + sizeof(struct seq_net_private))) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); @@ -351,7 +352,8 @@ int __init phonet_device_init(void) if (err) return err; - proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops); + proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops, + sizeof(struct seq_net_private)); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index f9b40e6a18a5..30187990257f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -620,25 +620,12 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations pn_sock_seq_ops = { +const struct seq_operations pn_sock_seq_ops = { .start = pn_sock_seq_start, .next = pn_sock_seq_next, .stop = pn_sock_seq_stop, .show = pn_sock_seq_show, }; - -static int pn_sock_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &pn_sock_seq_ops, - sizeof(struct seq_net_private)); -} - -const struct file_operations pn_sock_seq_fops = { - .open = pn_sock_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif static struct { @@ -802,23 +789,10 @@ static int pn_res_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations pn_res_seq_ops = { +const struct seq_operations pn_res_seq_ops = { .start = pn_res_seq_start, .next = pn_res_seq_next, .stop = pn_res_seq_stop, .show = pn_res_seq_show, }; - -static int pn_res_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &pn_res_seq_ops, - sizeof(struct seq_net_private)); -} - -const struct file_operations pn_res_seq_fops = { - .open = pn_res_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; #endif diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9ff5e0a76593..22a7f2b413ac 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1453,18 +1453,6 @@ static const struct seq_operations rose_info_seqops = { .stop = rose_info_stop, .show = rose_info_show, }; - -static int rose_info_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &rose_info_seqops); -} - -static const struct file_operations rose_info_fops = { - .open = rose_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; #endif /* CONFIG_PROC_FS */ static const struct net_proto_family rose_family_ops = { @@ -1567,13 +1555,13 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_create("rose", 0444, init_net.proc_net, &rose_info_fops); - proc_create("rose_neigh", 0444, init_net.proc_net, - &rose_neigh_fops); - proc_create("rose_nodes", 0444, init_net.proc_net, - &rose_nodes_fops); - proc_create("rose_routes", 0444, init_net.proc_net, - &rose_routes_fops); + proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops); + proc_create_seq("rose_neigh", 0444, init_net.proc_net, + &rose_neigh_seqops); + proc_create_seq("rose_nodes", 0444, init_net.proc_net, + &rose_node_seqops); + proc_create_seq("rose_routes", 0444, init_net.proc_net, + &rose_route_seqops); out: return rc; fail: diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 178619ddab68..77e9f85a2c92 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1143,25 +1143,13 @@ static int rose_node_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations rose_node_seqops = { +const struct seq_operations rose_node_seqops = { .start = rose_node_start, .next = rose_node_next, .stop = rose_node_stop, .show = rose_node_show, }; -static int rose_nodes_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &rose_node_seqops); -} - -const struct file_operations rose_nodes_fops = { - .open = rose_nodes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) __acquires(rose_neigh_list_lock) { @@ -1226,26 +1214,13 @@ static int rose_neigh_show(struct seq_file *seq, void *v) } -static const struct seq_operations rose_neigh_seqops = { +const struct seq_operations rose_neigh_seqops = { .start = rose_neigh_start, .next = rose_neigh_next, .stop = rose_neigh_stop, .show = rose_neigh_show, }; -static int rose_neigh_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &rose_neigh_seqops); -} - -const struct file_operations rose_neigh_fops = { - .open = rose_neigh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - - static void *rose_route_start(struct seq_file *seq, loff_t *pos) __acquires(rose_route_list_lock) { @@ -1311,25 +1286,12 @@ static int rose_route_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations rose_route_seqops = { +struct seq_operations rose_route_seqops = { .start = rose_route_start, .next = rose_route_next, .stop = rose_route_stop, .show = rose_route_show, }; - -static int rose_route_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &rose_route_seqops); -} - -const struct file_operations rose_routes_fops = { - .open = rose_route_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - #endif /* CONFIG_PROC_FS */ /* diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 19975d2ca9a2..29923ec2189c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1051,8 +1051,8 @@ void __rxrpc_queue_peer_error(struct rxrpc_peer *); /* * proc.c */ -extern const struct file_operations rxrpc_call_seq_fops; -extern const struct file_operations rxrpc_connection_seq_fops; +extern const struct seq_operations rxrpc_call_seq_ops; +extern const struct seq_operations rxrpc_connection_seq_ops; /* * recvmsg.c diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index c7a023fb22d0..5d6a773db973 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -97,8 +97,11 @@ static __net_init int rxrpc_init_net(struct net *net) if (!rxnet->proc_net) goto err_proc; - proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops); - proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops); + proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops, + sizeof(struct seq_net_private)); + proc_create_net("conns", 0444, rxnet->proc_net, + &rxrpc_connection_seq_ops, + sizeof(struct seq_net_private)); return 0; err_proc: diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 7e45db058823..d9fca8c4bcdc 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -115,26 +115,13 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) return 0; } -static const struct seq_operations rxrpc_call_seq_ops = { +const struct seq_operations rxrpc_call_seq_ops = { .start = rxrpc_call_seq_start, .next = rxrpc_call_seq_next, .stop = rxrpc_call_seq_stop, .show = rxrpc_call_seq_show, }; -static int rxrpc_call_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &rxrpc_call_seq_ops, - sizeof(struct seq_net_private)); -} - -const struct file_operations rxrpc_call_seq_fops = { - .open = rxrpc_call_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * generate a list of extant virtual connections in /proc/net/rxrpc_conns */ @@ -207,23 +194,9 @@ print: return 0; } -static const struct seq_operations rxrpc_connection_seq_ops = { +const struct seq_operations rxrpc_connection_seq_ops = { .start = rxrpc_connection_seq_start, .next = rxrpc_connection_seq_next, .stop = rxrpc_connection_seq_stop, .show = rxrpc_connection_seq_show, }; - - -static int rxrpc_connection_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &rxrpc_connection_seq_ops, - sizeof(struct seq_net_private)); -} - -const struct file_operations rxrpc_connection_seq_fops = { - .open = rxrpc_connection_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 853604685965..1fb39e1f9d07 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case htons(ETH_P_8021AD): break; default: + if (exists) + tcf_idr_release(*a, bind); return -EPROTONOSUPPORT; } } else { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 963e4bf0aab8..a57e112d9b3e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1588,7 +1588,7 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, return ret; ok_count = ret; - if (!exts) + if (!exts || ok_count) return ok_count; ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); if (ret < 0) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d964e60c730e..c79f6e71512e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -977,7 +977,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, return 0; errout_idr: - if (fnew->handle) + if (!fold) idr_remove(&head->handle_idr, fnew->handle); errout: tcf_exts_destroy(&fnew->exts); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 106dae7e4818..54eca685420f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2092,23 +2092,11 @@ static int psched_show(struct seq_file *seq, void *v) return 0; } -static int psched_open(struct inode *inode, struct file *file) -{ - return single_open(file, psched_show, NULL); -} - -static const struct file_operations psched_fops = { - .open = psched_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __net_init psched_net_init(struct net *net) { struct proc_dir_entry *e; - e = proc_create("psched", 0, net->proc_net, &psched_fops); + e = proc_create_single("psched", 0, net->proc_net, psched_show); if (e == NULL) return -ENOMEM; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16644b3d2362..56c181c3feeb 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, extack); if (IS_ERR(child)) return PTR_ERR(child); - } - if (child != &noop_qdisc) + /* child is fifo, no need to check for noop_qdisc */ qdisc_hash_add(child, true); + } + sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 03225a8df973..6f74a426f159 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, err = PTR_ERR(child); goto done; } + + /* child is fifo, no need to check for noop_qdisc */ + qdisc_hash_add(child, true); } sch_tree_lock(sch); @@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; - if (child != &noop_qdisc) - qdisc_hash_add(child, true); } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 42247110d842..0cd2e764f47f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1006,7 +1006,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index fd2684ad94c8..a6179b26b80c 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -108,25 +108,13 @@ static const struct seq_operations sctp_objcnt_seq_ops = { .show = sctp_objcnt_seq_show, }; -static int sctp_objcnt_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &sctp_objcnt_seq_ops); -} - -static const struct file_operations sctp_objcnt_ops = { - .open = sctp_objcnt_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* Initialize the objcount in the proc filesystem. */ void sctp_dbg_objcnt_init(struct net *net) { struct proc_dir_entry *ent; - ent = proc_create("sctp_dbg_objcnt", 0, - net->sctp.proc_net_sctp, &sctp_objcnt_ops); + ent = proc_create_seq("sctp_dbg_objcnt", 0, + net->sctp.proc_net_sctp, &sctp_objcnt_seq_ops); if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1d9ccc6dab2b..ef5c9a82d4e8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -88,19 +88,6 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) return 0; } -/* Initialize the seq file operations for 'snmp' object. */ -static int sctp_snmp_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, sctp_snmp_seq_show); -} - -static const struct file_operations sctp_snmp_seq_fops = { - .open = sctp_snmp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - /* Dump local addresses of an association/endpoint. */ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { @@ -225,21 +212,6 @@ static const struct seq_operations sctp_eps_ops = { .show = sctp_eps_seq_show, }; - -/* Initialize the seq file operations for 'eps' object. */ -static int sctp_eps_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &sctp_eps_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations sctp_eps_seq_fops = { - .open = sctp_eps_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; @@ -338,20 +310,6 @@ static const struct seq_operations sctp_assoc_ops = { .show = sctp_assocs_seq_show, }; -/* Initialize the seq file operations for 'assocs' object. */ -static int sctp_assocs_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &sctp_assoc_ops, - sizeof(struct sctp_ht_iter)); -} - -static const struct file_operations sctp_assocs_seq_fops = { - .open = sctp_assocs_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -431,36 +389,23 @@ static const struct seq_operations sctp_remaddr_ops = { .show = sctp_remaddr_seq_show, }; -static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &sctp_remaddr_ops, - sizeof(struct sctp_ht_iter)); -} - -static const struct file_operations sctp_remaddr_seq_fops = { - .open = sctp_remaddr_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - /* Set up the proc fs entry for the SCTP protocol. */ int __net_init sctp_proc_init(struct net *net) { net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); if (!net->sctp.proc_net_sctp) return -ENOMEM; - if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp, - &sctp_snmp_seq_fops)) + if (!proc_create_net_single("snmp", 0444, net->sctp.proc_net_sctp, + sctp_snmp_seq_show, NULL)) goto cleanup; - if (!proc_create("eps", 0444, net->sctp.proc_net_sctp, - &sctp_eps_seq_fops)) + if (!proc_create_net("eps", 0444, net->sctp.proc_net_sctp, + &sctp_eps_ops, sizeof(struct seq_net_private))) goto cleanup; - if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp, - &sctp_assocs_seq_fops)) + if (!proc_create_net("assocs", 0444, net->sctp.proc_net_sctp, + &sctp_assoc_ops, sizeof(struct sctp_ht_iter))) goto cleanup; - if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp, - &sctp_remaddr_seq_fops)) + if (!proc_create_net("remaddr", 0444, net->sctp.proc_net_sctp, + &sctp_remaddr_ops, sizeof(struct sctp_ht_iter))) goto cleanup; return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d685f8456762..6bf0a9971888 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1012,7 +1012,7 @@ static const struct proto_ops inet_seqpacket_ops = { .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ .bind = inet_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 80835ac26d2c..ae7e7c606f72 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1086,7 +1086,7 @@ out: */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, - int addrs_size, + int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct net *net = sock_net(sk); @@ -1104,7 +1104,6 @@ static int __sctp_connect(struct sock *sk, union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; - unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1254,13 +1253,7 @@ static int __sctp_connect(struct sock *sk, sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; - /* in-kernel sockets don't generally have a file allocated to them - * if all they do is call sock_create_kern(). - */ - if (sk->sk_socket->file) - f_flags = sk->sk_socket->file->f_flags; - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (assoc_id) *assoc_id = asoc->assoc_id; @@ -1348,7 +1341,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, sctp_assoc_t *assoc_id) { struct sockaddr *kaddrs; - int err = 0; + int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1367,7 +1360,13 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (err) goto out_free; - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + flags = sk->sk_socket->file->f_flags; + + err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); out_free: kvfree(kaddrs); @@ -4397,16 +4396,26 @@ out_nounlock: * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) + int addr_len, int flags) { - int err = 0; + struct inet_sock *inet = inet_sk(sk); struct sctp_af *af; + int err = 0; lock_sock(sk); pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); + /* We may need to bind the socket. */ + if (!inet->inet_num) { + if (sk->sk_prot->get_port(sk, 0)) { + release_sock(sk); + return -EAGAIN; + } + inet->inet_sport = htons(inet->inet_num); + } + /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { @@ -4415,13 +4424,25 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, /* Pass correct addr len to common routine (so it knows there * is only one address being passed. */ - err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); } release_sock(sk); return err; } +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return -EOPNOTSUPP; + + return sctp_connect(sock->sk, uaddr, addr_len, flags); +} + /* FIXME: Write comments. */ static int sctp_disconnect(struct sock *sk, int flags) { @@ -8724,7 +8745,6 @@ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8767,7 +8787,6 @@ struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 74568cdbca70..d7b88b2d1b22 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -245,40 +245,45 @@ out: static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, struct nlattr *tb[]) { - char *string, *ibname = NULL; - int rc = 0; + char *string, *ibname; + int rc; memset(pnetelem, 0, sizeof(*pnetelem)); INIT_LIST_HEAD(&pnetelem->list); - if (tb[SMC_PNETID_NAME]) { - string = (char *)nla_data(tb[SMC_PNETID_NAME]); - if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { - rc = -EINVAL; - goto error; - } - } - if (tb[SMC_PNETID_ETHNAME]) { - string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); - pnetelem->ndev = dev_get_by_name(net, string); - if (!pnetelem->ndev) - return -ENOENT; - } - if (tb[SMC_PNETID_IBNAME]) { - ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); - ibname = strim(ibname); - pnetelem->smcibdev = smc_pnet_find_ib(ibname); - if (!pnetelem->smcibdev) { - rc = -ENOENT; - goto error; - } - } - if (tb[SMC_PNETID_IBPORT]) { - pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); - if (pnetelem->ib_port > SMC_MAX_PORTS) { - rc = -EINVAL; - goto error; - } - } + + rc = -EINVAL; + if (!tb[SMC_PNETID_NAME]) + goto error; + string = (char *)nla_data(tb[SMC_PNETID_NAME]); + if (!smc_pnetid_valid(string, pnetelem->pnet_name)) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_ETHNAME]) + goto error; + rc = -ENOENT; + string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); + pnetelem->ndev = dev_get_by_name(net, string); + if (!pnetelem->ndev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBNAME]) + goto error; + rc = -ENOENT; + ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); + ibname = strim(ibname); + pnetelem->smcibdev = smc_pnet_find_ib(ibname); + if (!pnetelem->smcibdev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBPORT]) + goto error; + pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); + if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) + goto error; + return 0; error: @@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) void *hdr; int rc; + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; pnetelem = smc_pnet_find_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); if (!pnetelem) @@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; return smc_pnet_remove_by_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c81ef5e6c981..4fda18d47e2c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -609,22 +609,6 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) return ret; } -int rpc_rmdir(struct dentry *dentry) -{ - struct dentry *parent; - struct inode *dir; - int error; - - parent = dget_parent(dentry); - dir = d_inode(parent); - inode_lock_nested(dir, I_MUTEX_PARENT); - error = __rpc_rmdir(dir, dentry); - inode_unlock(dir); - dput(parent); - return error; -} -EXPORT_SYMBOL_GPL(rpc_rmdir); - static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 71e79597f940..e1c93ce74e0f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -680,7 +680,6 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgin = &sgin_arr[0]; struct strp_msg *rxm = strp_msg(skb); int ret, nsg = ARRAY_SIZE(sgin_arr); - char aad_recv[TLS_AAD_SPACE_SIZE]; struct sk_buff *unused; ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, @@ -698,13 +697,13 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, } sg_init_table(sgin, nsg); - sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv)); + sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE); nsg = skb_to_sgvec(skb, &sgin[1], rxm->offset + tls_ctx->rx.prepend_size, rxm->full_len - tls_ctx->rx.prepend_size); - tls_make_aad(aad_recv, + tls_make_aad(ctx->rx_aad_ciphertext, rxm->full_len - tls_ctx->rx.overhead_size, tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, @@ -803,12 +802,12 @@ int tls_sw_recvmsg(struct sock *sk, if (to_copy <= len && page_count < MAX_SKB_FRAGS && likely(!(flags & MSG_PEEK))) { struct scatterlist sgin[MAX_SKB_FRAGS + 1]; - char unused[21]; int pages = 0; zc = true; sg_init_table(sgin, MAX_SKB_FRAGS + 1); - sg_set_buf(&sgin[0], unused, 13); + sg_set_buf(&sgin[0], ctx->rx_aad_plaintext, + TLS_AAD_SPACE_SIZE); err = zerocopy_from_iter(sk, &msg->msg_iter, to_copy, &pages, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 68bb70a62afe..e5473c03d667 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2852,20 +2852,6 @@ static const struct seq_operations unix_seq_ops = { .stop = unix_seq_stop, .show = unix_seq_show, }; - -static int unix_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &unix_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations unix_seq_fops = { - .open = unix_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - #endif static const struct net_proto_family unix_family_ops = { @@ -2884,7 +2870,8 @@ static int __net_init unix_net_init(struct net *net) goto out; #ifdef CONFIG_PROC_FS - if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { + if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, + sizeof(struct seq_net_private))) { unix_sysctl_unregister(net); goto out; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a052693c2e85..7c5135a92d76 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -15555,7 +15555,8 @@ void cfg80211_ft_event(struct net_device *netdev, if (!ft_event->target_ap) return; - msg = nlmsg_new(100 + ft_event->ric_ies_len, GFP_KERNEL); + msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len, + GFP_KERNEL); if (!msg) return; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ac3e12c32aa3..5fcec5c94eb7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -916,6 +916,9 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, const struct fwdb_header *hdr = regdb; const struct fwdb_country *country; + if (!regdb) + return -ENODATA; + if (IS_ERR(regdb)) return PTR_ERR(regdb); diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index b4c464594a5e..cadcf8613af2 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -126,24 +126,11 @@ static const struct seq_operations wireless_seq_ops = { .show = wireless_dev_seq_show, }; -static int seq_open_wireless(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &wireless_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations wireless_seq_fops = { - .open = seq_open_wireless, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - int __net_init wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_create("wireless", 0444, net->proc_net, - &wireless_seq_fops)) + if (!proc_create_net("wireless", 0444, net->proc_net, + &wireless_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 64b415e93f6a..da52c9dc256c 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -171,57 +171,21 @@ static const struct seq_operations x25_seq_forward_ops = { .show = x25_seq_forward_show, }; -static int x25_seq_socket_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &x25_seq_socket_ops); -} - -static int x25_seq_route_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &x25_seq_route_ops); -} - -static int x25_seq_forward_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &x25_seq_forward_ops); -} - -static const struct file_operations x25_seq_socket_fops = { - .open = x25_seq_socket_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations x25_seq_route_fops = { - .open = x25_seq_route_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations x25_seq_forward_fops = { - .open = x25_seq_forward_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - int __init x25_proc_init(void) { if (!proc_mkdir("x25", init_net.proc_net)) return -ENOMEM; - if (!proc_create("x25/route", 0444, init_net.proc_net, - &x25_seq_route_fops)) + if (!proc_create_seq("x25/route", 0444, init_net.proc_net, + &x25_seq_route_ops)) goto out; - if (!proc_create("x25/socket", 0444, init_net.proc_net, - &x25_seq_socket_fops)) + if (!proc_create_seq("x25/socket", 0444, init_net.proc_net, + &x25_seq_socket_ops)) goto out; - if (!proc_create("x25/forward", 0444, init_net.proc_net, - &x25_seq_forward_fops)) + if (!proc_create_seq("x25/forward", 0444, init_net.proc_net, + &x25_seq_forward_ops)) goto out; return 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 40b54cc64243..5f48251c1319 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1658,7 +1658,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len; } -out: return &xdst0->u.dst; put_states: @@ -1667,8 +1666,8 @@ put_states: free_dst: if (xdst0) dst_release_immediate(&xdst0->u.dst); - xdst0 = ERR_PTR(err); - goto out; + + return ERR_PTR(err); } static int xfrm_expand_policies(const struct flowi *fl, u16 family, diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index ed06903cd84d..178318d2e120 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -65,22 +65,10 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) return 0; } -static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, xfrm_statistics_seq_show); -} - -static const struct file_operations xfrm_statistics_seq_fops = { - .open = xfrm_statistics_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release_net, -}; - int __net_init xfrm_proc_init(struct net *net) { - if (!proc_create("xfrm_stat", 0444, net->proc_net, - &xfrm_statistics_seq_fops)) + if (!proc_create_net_single("xfrm_stat", 0444, net->proc_net, + xfrm_statistics_seq_show, NULL)) return -ENOMEM; return 0; } diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e16d6713f236..2d42eb9cd1a5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5041,7 +5041,7 @@ sub process { $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g; $tmp_stmt =~ s/\#+\s*$arg\b//g; $tmp_stmt =~ s/\b$arg\s*\#\#//g; - my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g; + my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g; if ($use_cnt > 1) { CHK("MACRO_ARG_REUSE", "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx"); diff --git a/security/keys/proc.c b/security/keys/proc.c index fbc4af5c6c9f..5af2934965d8 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -18,7 +18,6 @@ #include <asm/errno.h> #include "internal.h" -static int proc_keys_open(struct inode *inode, struct file *file); static void *proc_keys_start(struct seq_file *p, loff_t *_pos); static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_keys_stop(struct seq_file *p, void *v); @@ -31,14 +30,6 @@ static const struct seq_operations proc_keys_ops = { .show = proc_keys_show, }; -static const struct file_operations proc_keys_fops = { - .open = proc_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int proc_key_users_open(struct inode *inode, struct file *file); static void *proc_key_users_start(struct seq_file *p, loff_t *_pos); static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_key_users_stop(struct seq_file *p, void *v); @@ -51,13 +42,6 @@ static const struct seq_operations proc_key_users_ops = { .show = proc_key_users_show, }; -static const struct file_operations proc_key_users_fops = { - .open = proc_key_users_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * Declare the /proc files. */ @@ -65,11 +49,11 @@ static int __init key_proc_init(void) { struct proc_dir_entry *p; - p = proc_create("keys", 0, NULL, &proc_keys_fops); + p = proc_create_seq("keys", 0, NULL, &proc_keys_ops); if (!p) panic("Cannot create /proc/keys\n"); - p = proc_create("key-users", 0, NULL, &proc_key_users_fops); + p = proc_create_seq("key-users", 0, NULL, &proc_key_users_ops); if (!p) panic("Cannot create /proc/key-users\n"); @@ -96,11 +80,6 @@ static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n) return n; } -static int proc_keys_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_keys_ops); -} - static struct key *find_ge_key(struct seq_file *p, key_serial_t id) { struct user_namespace *user_ns = seq_user_ns(p); @@ -293,15 +272,6 @@ static struct rb_node *key_user_first(struct user_namespace *user_ns, struct rb_ return __key_user_next(user_ns, n); } -/* - * Implement "/proc/key-users" to provides a list of the key users and their - * quotas. - */ -static int proc_key_users_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_key_users_ops); -} - static void *proc_key_users_start(struct seq_file *p, loff_t *_pos) __acquires(key_user_lock) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index be5817df0a9d..99c4675952f7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -274,11 +274,10 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent * Try reloading inode security labels that have been marked as invalid. The * @may_sleep parameter indicates when sleeping and thus reloading labels is * allowed; when set to false, returns -ECHILD when the label is - * invalid. The @opt_dentry parameter should be set to a dentry of the inode; - * when no dentry is available, set it to NULL instead. + * invalid. The @dentry parameter should be set to a dentry of the inode. */ static int __inode_security_revalidate(struct inode *inode, - struct dentry *opt_dentry, + struct dentry *dentry, bool may_sleep) { struct inode_security_struct *isec = inode->i_security; @@ -295,7 +294,7 @@ static int __inode_security_revalidate(struct inode *inode, * @opt_dentry is NULL and no dentry for this inode can be * found; in that case, continue using the old label. */ - inode_doinit_with_dentry(inode, opt_dentry); + inode_doinit_with_dentry(inode, dentry); } return 0; } @@ -1568,8 +1567,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent /* Called from d_instantiate or d_splice_alias. */ dentry = dget(opt_dentry); } else { - /* Called from selinux_complete_init, try to find a dentry. */ + /* + * Called from selinux_complete_init, try to find a dentry. + * Some filesystems really want a connected one, so try + * that first. We could split SECURITY_FS_USE_XATTR in + * two, depending upon that... + */ dentry = d_find_alias(inode); + if (!dentry) + dentry = d_find_any_alias(inode); } if (!dentry) { /* @@ -1674,14 +1680,19 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) { /* We must have a dentry to determine the label on * procfs inodes */ - if (opt_dentry) + if (opt_dentry) { /* Called from d_instantiate or * d_splice_alias. */ dentry = dget(opt_dentry); - else + } else { /* Called from selinux_complete_init, try to - * find a dentry. */ + * find a dentry. Some filesystems really want + * a connected one, so try that first. + */ dentry = d_find_alias(inode); + if (!dentry) + dentry = d_find_any_alias(inode); + } /* * This can be hit on boot when a file is accessed * before the policy is loaded. When we load policy we diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8057e19dc15f..3ce225e3f142 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1494,7 +1494,7 @@ static int security_context_to_sid_core(struct selinux_state *state, scontext_len, &context, def_sid); if (rc == -EINVAL && force) { context.str = str; - context.len = scontext_len; + context.len = strlen(str) + 1; str = NULL; } else if (rc) goto out_unlock; diff --git a/sound/core/timer.c b/sound/core/timer.c index dc87728c5b74..0ddcae495838 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -592,7 +592,7 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) else timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_CONTINUE); + SNDRV_TIMER_EVENT_PAUSE); unlock: spin_unlock_irqrestore(&timer->lock, flags); return result; @@ -614,7 +614,7 @@ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop) list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_CONTINUE); + SNDRV_TIMER_EVENT_PAUSE); spin_unlock(&timeri->timer->lock); } spin_unlock_irqrestore(&slave_active_lock, flags); diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 321e78baa63c..9bd935216c18 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -622,8 +622,10 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid, { return snd_hdac_check_power_state(&codec->core, nid, target_state); } -static inline bool snd_hda_sync_power_state(struct hda_codec *codec, - hda_nid_t nid, unsigned int target_state) + +static inline unsigned int snd_hda_sync_power_state(struct hda_codec *codec, + hda_nid_t nid, + unsigned int target_state) { return snd_hdac_sync_power_state(&codec->core, nid, target_state); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c5ec89732a8d..8c317737ba3f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1017,6 +1017,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); @@ -1030,6 +1031,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5922443063f0..0f9f06df49bc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2035,7 +2035,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return -EINVAL; obj = bpf_object__open(attr->file); - if (IS_ERR(obj)) + if (IS_ERR_OR_NULL(obj)) return -ENOENT; bpf_object__for_each_program(prog, obj) { diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index d00f0d51cab8..dfb218feaad9 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m) A structure defining the number of CPUs. struct nr_cpus { - uint32_t nr_cpus_online; uint32_t nr_cpus_available; /* CPUs not yet onlined */ + uint32_t nr_cpus_online; }; HEADER_CPUDESC = 8, @@ -153,10 +153,18 @@ struct { HEADER_CPU_TOPOLOGY = 13, String lists defining the core and CPU threads topology. +The string lists are followed by a variable length array +which contains core_id and socket_id of each cpu. +The number of entries can be determined by the size of the +section minus the sizes of both string lists. struct { struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list threads; /* Variable length */ + struct { + uint32_t core_id; + uint32_t socket_id; + } cpus[nr]; /* Variable length records */ }; Example: diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 17cb1bb3448c..40e30a26b23c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", session); + /* On platforms with large numbers of CPUs process_cpu_topology() + * might issue an error while reading the perf.data file section + * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member + * cpu is a NULL pointer. + * Example: On s390 + * CPU 0 is on core_id 0 and physical_package_id 6 + * CPU 1 is on core_id 1 and physical_package_id 3 + * + * Core_id and physical_package_id are platform and architecture + * dependend and might have higher numbers than the CPU id. + * This actually depends on the configuration. + * + * In this case process_cpu_topology() prints error message: + * "socket_id number is too big. You may need to upgrade the + * perf tool." + * + * This is the reason why this test might be skipped. + */ + if (!session->header.env.cpu) + return TEST_SKIP; + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe { char path[PATH_MAX]; struct cpu_map *map; - int ret = -1; + int ret = TEST_FAIL; TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); @@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe goto free_path; } - if (check_cpu_topology(path, map)) - goto free_map; - ret = 0; - -free_map: + ret = check_cpu_topology(path, map); cpu_map__put(map); + free_path: unlink(path); return ret; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index af7ad814b2c3..cee658733e2c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) } obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load buffer\n"); return ERR_PTR(-EINVAL); } @@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) pr_debug("bpf: successfull builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); - if (!IS_ERR(obj) && llvm_param.dump_obj) + if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) llvm__dump_obj(filename, obj_buf, obj_buf_sz); free(obj_buf); } else obj = bpf_object__open(filename); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load %s\n", filename); return obj; } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index c8b98fa22997..4d5fc374e730 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, /* Nothing to do, might as well just return */ if (decoder->packet_count == 0) return 0; + /* + * The queueing process in function cs_etm_decoder__buffer_packet() + * increments the tail *before* using it. This is somewhat counter + * intuitive but it has the advantage of centralizing tail management + * at a single location. Because of that we need to follow the same + * heuristic with the head, i.e we increment it before using its + * value. Otherwise the first element of the packet queue is not + * used. + */ + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); *packet = decoder->packet_buffer[decoder->head]; - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); - decoder->packet_count--; return 1; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 92ec009a292d..b13f5f234c8f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -127,6 +127,7 @@ struct perf_evsel { bool precise_max; bool ignore_missing_thread; bool forced_leader; + bool use_uncore_alias; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b8b8a9558d32..2fc4ee8b86c1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1219,13 +1219,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats) + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias) { struct perf_event_attr attr; struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; struct parse_events_error *err = parse_state->error; + bool use_uncore_alias; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); @@ -1244,11 +1247,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, memset(&attr, 0, sizeof(attr)); } + use_uncore_alias = (pmu->is_uncore && use_alias); + if (!head_config) { attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); if (evsel) { evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; return 0; } else { return -ENOMEM; @@ -1282,6 +1288,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; } return evsel ? 0 : -ENOMEM; @@ -1317,7 +1324,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, list_add_tail(&term->list, head); if (!parse_events_add_pmu(parse_state, list, - pmu->name, head, true)) { + pmu->name, head, + true, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; @@ -1339,7 +1347,120 @@ int parse_events__modifier_group(struct list_head *list, return parse_events__modifier_event(list, event_mod, true); } -void parse_events__set_leader(char *name, struct list_head *list) +/* + * Check if the two uncore PMUs are from the same uncore block + * The format of the uncore PMU name is uncore_#blockname_#pmuidx + */ +static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b) +{ + char *end_a, *end_b; + + end_a = strrchr(pmu_name_a, '_'); + end_b = strrchr(pmu_name_b, '_'); + + if (!end_a || !end_b) + return false; + + if ((end_a - pmu_name_a) != (end_b - pmu_name_b)) + return false; + + return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0); +} + +static int +parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, + struct parse_events_state *parse_state) +{ + struct perf_evsel *evsel, *leader; + uintptr_t *leaders; + bool is_leader = true; + int i, nr_pmu = 0, total_members, ret = 0; + + leader = list_first_entry(list, struct perf_evsel, node); + evsel = list_last_entry(list, struct perf_evsel, node); + total_members = evsel->idx - leader->idx + 1; + + leaders = calloc(total_members, sizeof(uintptr_t)); + if (WARN_ON(!leaders)) + return 0; + + /* + * Going through the whole group and doing sanity check. + * All members must use alias, and be from the same uncore block. + * Also, storing the leader events in an array. + */ + __evlist__for_each_entry(list, evsel) { + + /* Only split the uncore group which members use alias */ + if (!evsel->use_uncore_alias) + goto out; + + /* The events must be from the same uncore block */ + if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name)) + goto out; + + if (!is_leader) + continue; + /* + * If the event's PMU name starts to repeat, it must be a new + * event. That can be used to distinguish the leader from + * other members, even they have the same event name. + */ + if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { + is_leader = false; + continue; + } + /* The name is always alias name */ + WARN_ON(strcmp(leader->name, evsel->name)); + + /* Store the leader event for each PMU */ + leaders[nr_pmu++] = (uintptr_t) evsel; + } + + /* only one event alias */ + if (nr_pmu == total_members) { + parse_state->nr_groups--; + goto handled; + } + + /* + * An uncore event alias is a joint name which means the same event + * runs on all PMUs of a block. + * Perf doesn't support mixed events from different PMUs in the same + * group. The big group has to be split into multiple small groups + * which only include the events from the same PMU. + * + * Here the uncore event aliases must be from the same uncore block. + * The number of PMUs must be same for each alias. The number of new + * small groups equals to the number of PMUs. + * Setting the leader event for corresponding members in each group. + */ + i = 0; + __evlist__for_each_entry(list, evsel) { + if (i >= nr_pmu) + i = 0; + evsel->leader = (struct perf_evsel *) leaders[i++]; + } + + /* The number of members and group name are same for each group */ + for (i = 0; i < nr_pmu; i++) { + evsel = (struct perf_evsel *) leaders[i]; + evsel->nr_members = total_members / nr_pmu; + evsel->group_name = name ? strdup(name) : NULL; + } + + /* Take the new small groups into account */ + parse_state->nr_groups += nr_pmu - 1; + +handled: + ret = 1; +out: + free(leaders); + return ret; +} + +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state) { struct perf_evsel *leader; @@ -1348,6 +1469,9 @@ void parse_events__set_leader(char *name, struct list_head *list) return; } + if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) + return; + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 5015cfd58277..4473dac27aee 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats); + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, @@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old, enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); -void parse_events__set_leader(char *name, struct list_head *list); +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_evlist_error(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 7afeb80cc39e..e37608a87dba 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -161,7 +161,7 @@ PE_NAME '{' events '}' struct list_head *list = $3; inc_group_count(list, _parse_state); - parse_events__set_leader($1, list); + parse_events__set_leader($1, list, _parse_state); $$ = list; } | @@ -170,7 +170,7 @@ PE_NAME '{' events '}' struct list_head *list = $2; inc_group_count(list, _parse_state); - parse_events__set_leader(NULL, list); + parse_events__set_leader(NULL, list, _parse_state); $$ = list; } @@ -232,7 +232,7 @@ PE_NAME opt_event_config YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -251,7 +251,7 @@ PE_NAME opt_event_config free(pattern); YYABORT; } - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) ok++; parse_events_terms__delete(terms); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 10dd5fce082b..7f8afacd08ee 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -531,6 +531,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->period)); pydict_set_item_string_decref(dict_sample, "phys_addr", PyLong_FromUnsignedLongLong(sample->phys_addr)); + pydict_set_item_string_decref(dict_sample, "addr", + PyLong_FromUnsignedLongLong(sample->addr)); set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..fd7de7eb329e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11713,6 +11713,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 168c66d74fc5..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 1571e24e9494..f91aeb5fe571 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -6,8 +6,6 @@ # error Virtio userspace code does not support CONFIG_HAS_DMA #endif -#define PCI_DMA_BUS_IS_PHYS 1 - enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, |